Package org.dspace.app.statistics

Source Code of org.dspace.app.statistics.ReportGenerator

/*
* ReportGenerator.java
*
* Version: $Revision: 3734 $
*
* Date: $Date: 2009-04-24 04:00:19 +0000 (Fri, 24 Apr 2009) $
*
* Copyright (c) 2002-2009, The DSpace Foundation.  All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of the DSpace Foundation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/

package org.dspace.app.statistics;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.sql.SQLException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.dspace.content.DCValue;
import org.dspace.content.Item;
import org.dspace.core.ConfigurationManager;
import org.dspace.core.Context;
import org.dspace.handle.HandleManager;

/**
* This class performs the action of coordinating a usage report being
* generated using the standard internal aggregation file format as a basis.
* All it's configuration information must come from that file.  There is the
* opportunity for different output format options such as HTML.
*
* Use the -help flag for more information
*
* @author  Richard Jones
*/
public class ReportGenerator
{
    // set up our class globals
   
    /////////////////
    // aggregators
    /////////////////
   
    /** aggregator for all actions performed in the system */
    private static Map actionAggregator;
   
    /** aggregator for all searches performed */
    private static Map searchAggregator;
   
    /** aggregator for user logins */
    private static Map userAggregator;
   
    /** aggregator for item views */
    private static Map itemAggregator;
   
    /** aggregator for current archive state statistics */
    private static Map archiveStats;
   
   
    //////////////////
    // statistics config data
    //////////////////
   
    /** bottom limit to output for search word analysis */
    private static int searchFloor;
   
    /** bottom limit to output for item view analysis */
    private static int itemFloor;
   
    /** number of items from most popular to be looked up in the database */
    private static int itemLookup;
   
    /** mode to use for user email display */
    private static String userEmail;
   
    /** URL of the service being analysed */
    private static String url;
   
    /** Name of the service being analysed */
    private static String name;

    /** average number of views per item */
    private static int avgItemViews;
   
    /** name of the server being analysed */
    private static String serverName;
   
    /** start date of this report */
    private static Date startDate = null;
   
    /** end date of this report */
    private static Date endDate = null;
   
    /** the time taken to build the aggregation file from the log */
    private static int processTime;
 
    /** the number of log lines analysed */
    private static int logLines;
   
    /** the number of warnings encountered */
    private static int warnings;
   
    /** the list of results to be displayed in the general summary */
    private static List generalSummary;
   
    //////////////////
    // regular expressions
    //////////////////
   
    /** pattern that matches an unqualified aggregator property */
    private static Pattern real = Pattern.compile("^(.+)=(.+)");
   
    //////////////////////////
   // Miscellaneous variables
   //////////////////////////
  
   /** process timing clock */
   private static Calendar startTime = null;
  
   /** a map from log file action to human readable action */
   private static Map actionMap = null;
  
    /////////////////
    // report generator config data
    ////////////////
   
    /** the input file to build the report from */
    private static String input = null;
   
   /** the log file action to human readable action map */
   private static String map = ConfigurationManager.getProperty("dspace.dir") +
                            File.separator + "config" + File.separator + "dstat.map";
  
  
    /**
     * main method to be run from command line.  See usage information for
     * details as to how to use the command line flags
     */
    public static void main(String [] argv)
        throws Exception, SQLException
    {
        // create context as super user
        Context context = new Context();
        context.setIgnoreAuthorization(true);
       
        String myFormat = null;
        String myInput = null;
        String myOutput = null;
        String myMap = null;
       
        // read in our command line options
        for (int i = 0; i < argv.length; i++)
        {
            if (argv[i].equals("-format"))
            {
                myFormat = argv[i+1].toLowerCase();
            }
           
            if (argv[i].equals("-in"))
            {
                myInput = argv[i+1];
            }
           
            if (argv[i].equals("-out"))
            {
                myOutput = argv[i+1];
            }
           
            if (argv[i].equals("-map"))
            {
                myMap = argv[i+1];
            }
           
            if (argv[i].equals("-help"))
            {
                usage();
                System.exit(0);
            }
        }

        processReport(context, myFormat, myInput, myOutput, myMap);
    }
   
    /**
     * using the pre-configuration information passed here, read in the
     * aggregation data and output a file containing the report in the
     * requested format
     *
     * this method is retained for backwards compatibility, but delegates the actual
     * wprk to a new method
     *
     * @param   context     the DSpace context in which this action is performed
     * @param   myFormat    the desired output format (currently on HTML supported)
     * @param   myInput     the aggregation file to be turned into a report
     * @param   myOutput    the file into which to write the report
     */
    public static void processReport(Context context, String myFormat,
                                     String myInput, String myOutput,
                                     String myMap)
        throws Exception, SQLException
    {
        // create the relevant report type
        // FIXME: at the moment we only support HTML report generation
        Report report = null;
        if (myFormat.equals("html"))
        {
            report = new HTMLReport();
            ((HTMLReport)report).setOutput(myOutput);
        }

        if (myMap != null)
        {
            map = myMap;
        }

        ReportGenerator.processReport(context, report, myInput);
    }

    /**
     * using the pre-configuration information passed here, read in the
     * aggregation data and output a file containing the report in the
     * requested format
     */
    public static void processReport(Context context, Report report,
                                     String myInput)
        throws Exception, SQLException
    {
        startTime = new GregorianCalendar();
            
        /** instantiate aggregators */
        actionAggregator = new HashMap();
        searchAggregator = new HashMap();
        userAggregator = new HashMap();
        itemAggregator = new HashMap();
        archiveStats = new HashMap();
        actionMap = new HashMap();
       
        /** instantite lists */
        generalSummary = new ArrayList();
               
        // set the parameters for this analysis
        setParameters(myInput);
       
        // pre prepare our standard file readers and buffered readers
        FileReader fr = null;
        BufferedReader br = null;
       
        // read the input file
        readInput(input);
       
        // load the log file action to human readable action map
        readMap(map);
       
        report.setStartDate(startDate);
        report.setEndDate(endDate);
        report.setMainTitle(name, serverName);
       
        // define our standard variables for re-use
        // FIXME: we probably don't need these once we've finished re-factoring
        Iterator keys = null;
        int i = 0;
        String explanation = null;
        int value;
       
        // FIXME: All of these sections should probably be buried in their own
        // custom methods
       
        Statistics overview = new Statistics();
       
        overview.setSectionHeader("General Overview");
       
        Iterator summaryEntries = generalSummary.iterator();
        while (summaryEntries.hasNext())
        {
            String entry = (String) summaryEntries.next();
            if (actionAggregator.containsKey(entry))
            {
                int count = Integer.parseInt((String) actionAggregator.get(entry));
                overview.add(new Stat(translate(entry), count));
            }
        }
       
        report.addBlock(overview);
       
        // prepare the archive statistics package
        if (archiveStats.size() > 0)
        {
            Statistics archiveInfo = prepareStats(archiveStats, true, false);
            archiveInfo.setSectionHeader("Archive Information");
            archiveInfo.setStatName("Content Type");
            archiveInfo.setResultName("Number of items");
       
            report.addBlock(archiveInfo);
        }
       
       
       
        // process the items in preparation to be displayed.  This includes sorting
        // by view number, building the links, and getting further info where
        // necessary
        Statistics viewedItems = new Statistics("Item/Handle", "Number of views", itemFloor);
        viewedItems.setSectionHeader("Items Viewed");
       
        Stat[] items = new Stat[itemAggregator.size()];
       
        keys = itemAggregator.keySet().iterator();
        i = 0;
        while (keys.hasNext())
        {
            String key = (String) keys.next();
            String link = url + "handle/" + key;
            value = Integer.parseInt((String) itemAggregator.get(key));
            items[i] = new Stat(key, value, link);
            i++;
        }
       
        Arrays.sort(items);
       
        String info = null;
        for (i = 0; i < items.length; i++)
        {
            // Allow negative value to say that all items should be looked up
            if (itemLookup < 0 || i < itemLookup)
            {
                info = getItemInfo(context, items[i].getKey());
            }
              
            // if we get something back from the db then set it as the key,
            // else just use the link
            if (info != null)
            {
                items[i].setKey(info  + " (" + items[i].getKey() + ")");
            }
            else
            {
                items[i].setKey(items[i].getReference());
            }
           
            // reset the info register
            info = null;
        }
       
        viewedItems.add(items);
       
        report.addBlock(viewedItems);
       
        // prepare a report of the full action statistics
        Statistics fullInfo = prepareStats(actionAggregator, true, true);
        fullInfo.setSectionHeader("All Actions Performed");
        fullInfo.setStatName("Action");
        fullInfo.setResultName("Number of times");
       
        report.addBlock(fullInfo);
       
        // prepare the user login statistics package
        if (!userEmail.equals("off"))
        {
            Statistics userLogins = prepareStats(userAggregator, true, false);
            userLogins.setSectionHeader("User Logins");
            userLogins.setStatName("User");
            userLogins.setResultName("Number of logins");
            if (userEmail.equals("alias"))
            {
                explanation = "(distinct addresses)";
                userLogins.setExplanation(explanation);
            }
       
            report.addBlock(userLogins);
        }

        // prepare the search word statistics package
        Statistics searchWords = prepareStats(searchAggregator, true, false);
        searchWords.setSectionHeader("Words Searched");
        searchWords.setStatName("Word");
        searchWords.setResultName("Number of searches");
        searchWords.setFloor(searchFloor);
       
        report.addBlock(searchWords);
       
        // FIXME: because this isn't an aggregator it can't be passed to
        // prepareStats; should we overload this method for use with this kind
        // of data?
        // prepare the average item views statistics
        if (avgItemViews > 0)
        {
            Statistics avg = new Statistics();
            avg.setSectionHeader("Averaging Information");

            Stat[] average = new Stat[1];
       
            average[0] = new Stat("Average views per item", avgItemViews);
            avg.add(average);
            report.addBlock(avg);
        }
     
       
        // prepare the log line level statistics
        // FIXME: at the moment we only know about warnings, but future versions
        // should aggregate all log line levels and display here
        Statistics levels = new Statistics("Level", "Number of lines");
        levels.setSectionHeader("Log Level Information");
       
        Stat[] level = new Stat[1];
        level[0] = new Stat("Warnings", warnings);
       
        levels.add(level);
       
        report.addBlock(levels);
       
        // get the display processing time information
        Calendar endTime = new GregorianCalendar();
        long timeInMillis = (endTime.getTimeInMillis() - startTime.getTimeInMillis());
        int outputProcessTime = (new Long(timeInMillis).intValue() / 1000);
       
        // prepare the processing information statistics
        Statistics process = new Statistics("Operation", "");
        process.setSectionHeader("Processing Information");
       
        Stat[] proc = new Stat[3];
        proc[0] = new Stat("Log Processing Time", processTime);
        proc[0].setUnits("seconds");
        proc[1] = new Stat("Output Processing Time", outputProcessTime);
        proc[1].setUnits("seconds");
        proc[2] = new Stat("Log File Lines Analysed", logLines);
        proc[2].setUnits("lines");
       
        process.add(proc);
       
        report.addBlock(process);

        report.render();
       
        return;
    }
   
   
    /**
     * a standard stats block preparation method for use when an aggregator
     * has to be put out in its entirity.  This method will not be able to
     * deal with complex cases, although it will perform sorting by value and
     * translations as per the map file if requested
     *
     * @param   aggregator      the aggregator that should be converted
     * @param   sort            should the resulting stats be sorted by value
     * @param   translate       translate the stat name using the map file
     *
     * @return      a Statistics object containing all the relevant information
     */
    public static Statistics prepareStats(Map aggregator, boolean sort, boolean translate)
    {
        Stat[] stats = new Stat[aggregator.size()];
        if (aggregator.size() > 0)
        {
            Iterator keys = aggregator.keySet().iterator();
            int i = 0;
            while (keys.hasNext())
            {
                String key = (String) keys.next();
                int value = Integer.parseInt((String) aggregator.get(key));
                if (translate)
                {
                    stats[i] = new Stat(translate(key), value);
                }
                else
                {
                    stats[i] = new Stat(key, value);
                }
                i++;
            }
           
            if (sort)
            {
                Arrays.sort(stats);
            }
        }
       
        // add the results to the statistics object
        Statistics statistics = new Statistics();
        statistics.add(stats);
       
        return statistics;
    }
   
   
    /**
     * look the given text up in the action map table and return a translated
     * value if one exists.  If no translation exists the original text is
     * returned
     *
     * @param   text    the text to be translated
     *
     * @return      a string containing either the translated text or the original
     *              text
     */
    public static String translate(String text)
    {
        if (actionMap.containsKey(text))
        {
            return (String) actionMap.get(text);
        }
        else
        {
            return text;
        }
    }
   
   
    /**
     * read in the action map file which converts log file line actions into
     * actions which are more understandable to humans
     *
     * @param   map     the map file
     */
    public static void readMap(String map)
        throws IOException
    {
        FileReader fr = null;
        BufferedReader br = null;

        try
        {
            // read in the map file, printing a warning if none is found
            String record = null;
            try
            {
                fr = new FileReader(map);
                br = new BufferedReader(fr);
            }
            catch (IOException e)
            {
                System.err.println("Failed to read map file: log file actions will be displayed without translation");
                return;
            }

            // loop through the map file and read in the values
            while ((record = br.readLine()) != null)
            {
                Matcher matchReal = real.matcher(record);

                // if the line is real then read it in
                if (matchReal.matches())
                {
                    actionMap.put(matchReal.group(1).trim(), matchReal.group(2).trim());
                }
            }
        }
        finally
        {
            if (br != null)
                try { br.close(); } catch (IOException ioe) { }

            if (fr != null)
                try { fr.close(); } catch (IOException ioe) { }
        }
    }
   
   
    /**
     * set the passed parameters up as global class variables.  This has to
     * be done in a separate method because the API permits for running from
     * the command line with args or calling the processReport method statically
     * from elsewhere
     *
     * @param   myInput     regex for log file names
     */
    public static void setParameters(String myInput)
    {
        if (myInput != null)
        {
            input = myInput;
        }
       
        return;
    }
   
   
    /**
     * read the input file and populate all the class globals with the contents
     * The values that come from this file form the basis of the analysis report
     *
     * @param   input   the aggregator file
     */
    public static void readInput(String input)
        throws IOException, ParseException
    {
        FileReader fr = null;
        BufferedReader br = null;
       
        // read in the analysis information, throwing an error if we fail to open
        // the given file
        String record = null;
        try
       
            fr = new FileReader(input);
            br = new BufferedReader(fr);
        }
        catch (IOException e)
        { 
            System.out.println("Failed to read input file: " + input);
            return;
        }
       
        // first initialise a date format object to do our date processing
        // if necessary
        SimpleDateFormat sdf = new SimpleDateFormat("dd'/'MM'/'yyyy");

        // FIXME: although this works, it is not very elegant
        // loop through the aggregator file and read in the values
        while ((record = br.readLine()) != null)
        {
            // match real lines
            Matcher matchReal = real.matcher(record);
           
            // pre-prepare our input strings
            String section = null;
            String key = null;
            String value = null;
           
            // temporary string to hold the left hand side of the equation
            String left = null;
           
            // match the line or skip this record
            if (matchReal.matches())
            {
                // lift the values out of the matcher's result groups
                left = matchReal.group(1).trim();
                value = matchReal.group(2).trim();
               
                // now analyse the left hand side, splitting by ".", taking the
                // first token as the section and the remainder of the string
                // as they key if it exists
                StringTokenizer tokens = new StringTokenizer(left, ".");
                int numTokens = tokens.countTokens();
                if (tokens.hasMoreTokens())
                {
                    section = tokens.nextToken();
                    if (numTokens > 1)
                    {
                        key = left.substring(section.length() + 1);
                    }
                    else
                    {
                        key = "";
                    }
                }
            }
            else
            {
                continue;
            }
           
            // if the line is real, then we carry on
            // read the analysis contents in
            if (section.equals("archive"))
            {
                archiveStats.put(key, value);
            }
            else if (section.equals("action"))
            {
                actionAggregator.put(key, value);
            }
            else if (section.equals("user"))
            {
                userAggregator.put(key, value);
            }
            else if (section.equals("search"))
            {
                searchAggregator.put(key, value);
            }
            else if (section.equals("item"))
            {
                itemAggregator.put(key, value);
            }
            else if (section.equals("user_email"))
            {
                userEmail = value;
            }
            else if (section.equals("item_floor"))
            {
                itemFloor = Integer.parseInt(value);
            }
            else if (section.equals("search_floor"))
            {
                searchFloor = Integer.parseInt(value);
            }
            else if (section.equals("host_url"))
            {
                url = value;
            }
            else if (section.equals("item_lookup"))
            {
                itemLookup = Integer.parseInt(value);
            }
            else if (section.equals("avg_item_views"))
            {
                try
                {
                    avgItemViews = Integer.parseInt(value);
                }
                catch (NumberFormatException e)
                {
                    avgItemViews = 0;
                }
            }
            else if (section.equals("server_name"))
            {
                serverName = value;
            }
            else if (section.equals("service_name"))
            {
                name = value;
            }
            else if (section.equals("start_date"))
            {
                startDate = sdf.parse(value);
            }
            else if (section.equals("end_date"))
            {
                endDate = sdf.parse(value);
            }
            else if (section.equals("analysis_process_time"))
            {
                processTime = Integer.parseInt(value);
            }
            else if (section.equals("general_summary"))
            {
                generalSummary.add(value);
            }
            else if (section.equals("log_lines"))
            {
                logLines = Integer.parseInt(value);
            }
            else if (section.equals("warnings"))
            {
                warnings = Integer.parseInt(value);
            }
        }

        // close the inputs
        br.close();
        fr.close();
    }
   
    /**
     * get the information for the item with the given handle
     *
     * @param   context     the DSpace context we are operating under
     * @param   handle      the handle of the item being looked up, in the form
     *                      1234/567 and so forth
     *
     * @return      a string containing a reference (almost citation) to the
     *              article
     */
    public static String getItemInfo(Context context, String handle)
        throws SQLException
    {
        Item item = null;
       
        // ensure that the handle exists
        try
        {
            item = (Item) HandleManager.resolveToObject(context, handle);
        }
        catch (Exception e)
        {
            return null;
        }
       
        // if no handle that matches is found then also return null
        if (item == null)
        {
            return null;
        }
       
        // build the referece
        // FIXME: here we have blurred the line between content and presentation
        // and it should probably be un-blurred
        DCValue[] title = item.getDC("title", null, Item.ANY);
        DCValue[] author = item.getDC("contributor", "author", Item.ANY);
       
        StringBuffer authors = new StringBuffer();
        if (author.length > 0)
        {
            authors.append("(" + author[0].value);
        }
        if (author.length > 1)
        {
            authors.append(" et al");
        }
        if (author.length > 0)
        {
           authors.append(")");
        }
       
        String content = title[0].value + " " + authors.toString();
       
        return content;
    }
   
   
    /**
     * output the usage information to the terminal
     */
    public static void usage()
    {
        String usage = "Usage Information:\n" +
                        "ReportGenerator [options [parameters]]\n" +
                        "-format [output format]\n" +
                            "\tRequired\n" +
                            "\tSpecify the format that you would like the output in\n" +
                            "\tOptions:\n" +
                            "\t\thtml\n" +
                        "-in [aggregation file]\n" +
                            "\tRequired\n" +
                            "\tSpecify the aggregation data file to display\n" +
                        "-out [output file]\n" +
                            "\tOptional\n" +
                            "\tSpecify the file to output the report to\n" +
                            "\tDefault uses [dspace log directory]/report\n" +
                        "-map [map file]\n" +
                            "\tOptional\n" +
                            "\tSpecify the map file to translate log file actions into human readable actions\n" +
                            "\tDefault uses [dspace config directory]/dstat.map\n" +
                        "-help\n" +
                            "\tdisplay this usage information\n";
       
        System.out.println(usage);
    }
}
TOP

Related Classes of org.dspace.app.statistics.ReportGenerator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.