Package edu.uga.cs.fluxbuster.features

Source Code of edu.uga.cs.fluxbuster.features.FeatureCalculator

/*
* Copyright (C) 2012 Chris Neasbitt
* Author: Chris Neasbitt
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

package edu.uga.cs.fluxbuster.features;

import java.io.IOException;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collections;
import java.util.Date;
import java.util.Formatter;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.TreeMap;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.joda.time.DateTime;
import org.joda.time.Days;

import edu.uga.cs.fluxbuster.db.DBInterface;
import edu.uga.cs.fluxbuster.db.DBInterfaceFactory;
import edu.uga.cs.fluxbuster.utils.PropertiesUtils;

/**
* This class calculates longitudinal features of each cluster and
* stores them in the database.
*
* @author Chris Neasbitt
*/
public class FeatureCalculator {

  private DBInterface dbi = null;
 
  private Properties properties = null;
 
  private SimpleDateFormat df = null;
 
  private ArrayList<Date> prevDateBuf = null;
  private Date prevDateBufDate = null;
  private int prevDateBufWindow = 0;
   
  private static final String TABLES_QUERY1KEY = "TABLES_QUERY1";
 
  private static final String DOMAINSPREFIXKEY = "DOMAINS_TABLE_PREFIX";
 
  private static final String RESIPSPREFIXKEY = "RESIPS_TABLE_PREFIX";
 
  private static final String NOVELTY_QUERY1_1KEY = "NOVELTY_QUERY1_PART1";
 
  private static final String NOVELTY_QUERY1_2KEY = "NOVELTY_QUERY1_PART2";
 
  private static final String NOVELTY_QUERY1_3KEY = "NOVELTY_QUERY1_PART3";
 
  private static final String NOVELTY_QUERY2KEY = "NOVELTY_QUERY2";
 
  private static final String NOVELTY_WINDOWSKEY = "NOVELTY_WINDOWS";
 
  private static final String NOVELTY_WINFIELDSKEY = "NOVELTY_WINDOW_FIELDS";
 
  private static final String NOVELTY_QUERY3KEY = "NOVELTY_QUERY3";
 
  private static final String PREVCLUSTER_QUERY1KEY = "PREVCLUSTER_QUERY1";
 
  private static final String PREVCLUSTER_QUERY2KEY = "PREVCLUSTER_QUERY2";
 
  private static final String PREVCLUSTER_QUERY3KEY = "PREVCLUSTER_QUERY3";
 
  private static final String PREVCLUSTER_QUERY4KEY = "PREVCLUSTER_QUERY4";
 
  private static final String PREVCLUSTER_WINDOWKEY = "PREVCLUSTER_WINDOW";
 
  private static final String DOMAINSPERNETWORK_WINDOWKEY = "DOMAINSPERNETWORK_WINDOW";
 
  private static final String DOMAINSPERNETWORK_QUERY1KEY = "DOMAINSPERNETWORK_QUERY1";
 
  private static final String DOMAINSPERNETWORK_QUERY2KEY = "DOMAINSPERNETWORK_QUERY2";
 
  private static final String DOMAINSPERNETWORK_QUERY3KEY = "DOMAINSPERNETWORK_QUERY3";
 
  private static Log log = LogFactory.getLog(FeatureCalculator.class);

  /**
   * Instantiates a new feature calculator.
   *
   * @throws IOException if the FeatureCalculator.properties file
   *     can not be read
   */
  public FeatureCalculator() throws IOException {
    this(DBInterfaceFactory.loadDBInterface());
  }
 
  /**
   * Instantiates a new feature calculator with a specific database
   * interface.
   *
   * @param dbi the database interface
   * @throws IOException if the FeatureCalculator.properties file
   *     can not be read
   */
  public FeatureCalculator(DBInterface dbi) throws IOException {
    this.dbi = dbi;
    properties = PropertiesUtils.loadProperties(this.getClass());
    df = new SimpleDateFormat("yyyyMMdd");
  }
 
  /**
   * Calculates the domains per network feature for each cluster generated
   * on a specific run date.
   *
   * @param log_date the run date
   * @param window the number of days previous to use in feature calculation
   * @return a table of values where the keys are cluster ids and the values
   *     are the feature values
   * @throws SQLException if there is an error calculating the feature values
   */
 
  public Map<Integer, Double> calculateDomainsPerNetwork(Date log_date,
      int window) throws SQLException{
    HashMap<Integer, Double> retval = new HashMap<Integer, Double>();
    ArrayList<Date> prevDates = getPrevDates(log_date, window);
    if (prevDates.size() > 0) {
      String logDateStr = df.format(log_date);
      StringBuffer add_query = new StringBuffer();
      Formatter formatter = new Formatter(add_query);
     
      for(Date prevDate : prevDates){
        String prevDateStr = df.format(prevDate);
        formatter.format(" " + properties.getProperty(DOMAINSPERNETWORK_QUERY1KEY) + " ",
            logDateStr, prevDateStr, prevDateStr);
      }
      formatter.close();
     
      StringBuffer querybuf = new StringBuffer();
      formatter = new Formatter(querybuf);
      formatter.format(properties.getProperty(DOMAINSPERNETWORK_QUERY2KEY),
          logDateStr, logDateStr, logDateStr,add_query.toString());
      ResultSet rs = null;
      try{
        rs = dbi.executeQueryWithResult(querybuf.toString());   
        while(rs.next()){
          retval.put(rs.getInt(1), rs.getDouble(2));
        }
      } catch (Exception e) {
        if(log.isErrorEnabled()){
          log.error(e);
        }
      } finally {
        if(rs != null && !rs.isClosed()){
          rs.close();
        }
        formatter.close();
      }
    }
    return retval;
  }
 
  /**
   * Calculates the cluster novelty feature for each cluster generated
   * on a specific run date.
   *
   * @param log_date the run date
   * @param window the number of days previous to use in feature calculation
   * @return a table of values where the keys are cluster ids and the values
   *     are the feature values
   * @throws SQLException if there is an error calculating the feature values
   */
  public Map<Integer, Double> calculateNoveltyFeature(Date log_date,
      int window) throws SQLException {
    HashMap<Integer, Double> retval = new HashMap<Integer, Double>();
    ArrayList<Date> prevDates = getPrevDates(log_date, window);

    if (prevDates.size() > 0) {
      StringBuffer querybuf = new StringBuffer();
      Formatter formatter = new Formatter(querybuf);
      String curdatestr = df.format(log_date);
      formatter.format(properties.getProperty(NOVELTY_QUERY1_1KEY),
          curdatestr, curdatestr, curdatestr, curdatestr);
      for (Date prevDate : prevDates) {
        formatter
            .format(" "
                + properties.getProperty(NOVELTY_QUERY1_2KEY)
                + " ", df.format(prevDate));
      }
      formatter.format(properties.getProperty(NOVELTY_QUERY1_3KEY),
          curdatestr, curdatestr);

      ResultSet rs2 = null;
      Hashtable<Integer, Hashtable<String, Long>> new_resolved_ips
        = new Hashtable<Integer, Hashtable<String, Long>>();
      try{
        rs2 = dbi.executeQueryWithResult(querybuf.toString());
        while (rs2.next()) {
          int cluster_id = rs2.getInt(2);
          if (!new_resolved_ips.containsKey(cluster_id)) {
            new_resolved_ips.put(cluster_id,
                new Hashtable<String, Long>());
          }
          String secondLevelDomainName = rs2.getString(1);
          long newips = rs2.getLong(3);
          Hashtable<String, Long> clustertable = new_resolved_ips
              .get(cluster_id);
          clustertable.put(secondLevelDomainName, newips);
        }
      } catch(Exception e) {
        if(log.isErrorEnabled()){
          log.error(e);
        }
      } finally{
        if(rs2 != null && !rs2.isClosed()){
          rs2.close();
        }
        formatter.close();
      }
     

      Hashtable<String, List<Integer>> numDays = new Hashtable<String, List<Integer>>();
      for (Date prevDate : prevDates) {
        String prevDateStr = df.format(prevDate);
        querybuf = new StringBuffer();
        formatter = new Formatter(querybuf);
        formatter.format(properties.getProperty(NOVELTY_QUERY2KEY),
            curdatestr, prevDateStr, curdatestr, prevDateStr);
        ResultSet rs3 = null;
        try{
          rs3 = dbi.executeQueryWithResult(querybuf.toString());
          while (rs3.next()) {
            String sldn = rs3.getString(1);
            if (!numDays.containsKey(sldn)) {
              numDays.put(sldn, new ArrayList<Integer>());
            }
            Date pd = rs3.getDate(2);
            DateTime start = new DateTime(pd.getTime());
            DateTime end = new DateTime(log_date.getTime());
            Days d = Days.daysBetween(start, end);
            int diffDays = d.getDays();
            numDays.get(sldn).add(diffDays);
          }
        } catch (Exception e){
          if(log.isErrorEnabled()){
            log.error(e);
          }
        } finally {
          if(rs3 != null && !rs3.isClosed()){
            rs3.close();
          }
          formatter.close();
        }
      }

      Hashtable<Integer, List<Float>> clusterValues = new Hashtable<Integer, List<Float>>();
      for (int clusterID : new_resolved_ips.keySet()) {
        clusterValues.put(clusterID, new ArrayList<Float>());
       
        Hashtable<String, Long> sldnValues = new_resolved_ips.get(clusterID);
        for(String sldn : sldnValues.keySet()){
          if(numDays.keySet().contains(sldn)){
            long newIPCount = sldnValues.get(sldn);
            float f = ((float)newIPCount)/Collections.max(numDays.get(sldn));
            clusterValues.get(clusterID).add(f);
           
          }
        }
      }
     
      for(int clusterID : clusterValues.keySet()){
        if(clusterValues.get(clusterID) == null){  //I dont think it is possible for this to ever be true
          retval.put(clusterID, null);
        } else {
          double sum = 0;
          for(double d : clusterValues.get(clusterID)){
            sum += d;
          }
          double val = 0;
          if(clusterValues.get(clusterID).size() > 0){
            val = sum/clusterValues.get(clusterID).size();
          }
          retval.put(clusterID, val);
        }
      }
    }
    return retval;
  }
 
  /**
   * Calculates the previous cluster ratio feature for each cluster generated
   * on a specific run date and within the a specific window
   *
   * @param log_date the run date
   * @param window the number of days previous to use in feature calculation
   * @return a table of results, the keys of the table are cluster ids and the
   *     values are lists of two elements.  The first element is the
   *     last_growth_ratio_prev_clusters value and the second element is the
   *     last_growth_prefix_ratio_prev_clusters value
   * @throws SQLException if there is and error calculating the feature
   */
  public Hashtable<Integer, List<Double>> calculatePrevClusterRatios
    (Date log_date, int window) throws SQLException{
    Hashtable<Integer, List<Double>> retval = new Hashtable<Integer, List<Double>>();
   
    ArrayList<Date> prevDates = getPrevDates(log_date, window);
    String query1 = properties.getProperty(PREVCLUSTER_QUERY1KEY);
    String query2 = properties.getProperty(PREVCLUSTER_QUERY2KEY);
    String logDateStr = df.format(log_date);
    String completequery = new String();
   
    StringBuffer addQueryBuff = new StringBuffer();
    for(int i = 0; i < prevDates.size(); i++){
      String prevDateStr = df.format(prevDates.get(i));
      StringBuffer querybuf = new StringBuffer();
      Formatter formatter = new Formatter(querybuf);
      formatter.format(query1, logDateStr, logDateStr,
          prevDateStr, prevDateStr, prevDateStr);
      addQueryBuff.append(querybuf.toString());
      if(i < prevDates.size() - 1){
        addQueryBuff.append(" UNION ");
      }
      formatter.close();
    }
   
    if(addQueryBuff.length() > 0){
      StringBuffer querybuf = new StringBuffer();
      Formatter formatter = new Formatter(querybuf);
      formatter.format(query2, logDateStr, logDateStr,
          addQueryBuff.toString());
      completequery = querybuf.toString();
      formatter.close();
    }
   
    if(completequery.length() > 0){
      ResultSet rs = null;
      try{
        rs = dbi.executeQueryWithResult(completequery);
        while(rs.next()){
          ArrayList<Double> temp = new ArrayList<Double>();
          temp.add(rs.getDouble(3));
          temp.add(rs.getDouble(4));
          retval.put(rs.getInt(1), temp);
        }
      } catch (Exception e){
        if(log.isErrorEnabled()){
          log.error(e);
        }
      } finally {
        if(rs != null && !rs.isClosed()){
          rs.close();
        }
      }
      Hashtable<Integer, Double> queryPerDomain = getQueriesPerDomain(log_date);
      for(Integer clusterid : retval.keySet()){
        List<Double> values = retval.get(clusterid);
        values.set(0, values.get(0)/queryPerDomain.get(clusterid));
        values.set(1, values.get(1)/queryPerDomain.get(clusterid));
      }
    }
   
    return retval;
  }
 
  /**
   * Gets run dates previous to a specific date within a window
   * of days from that date.
   *
   * @param log_date the run date
   * @param window the number of days previous to the current date
   * @return the list of previous run dates
   * @throws SQLException if there is an error retrieving the previous
   *     run dates
   */
  public ArrayList<Date> getPrevDates(Date log_date, int window) throws SQLException{
    ArrayList<Date> prevDates = new ArrayList<Date>();
    if(prevDateBufDate != null && prevDateBuf != null && prevDateBufDate.equals(log_date)
        && prevDateBufWindow >= window){
     
      //pull the dates within the day window from the prevDateBuf cache
      Date pd = null;
      int windowcount = 0;
      for(Date d : prevDateBuf){
        if(windowcount >= window){
          break;
        }
        if(pd == null){
          pd = d;
          windowcount++;
        } else {
          DateTime morerecent = new DateTime(d.getTime());
          DateTime lessrecent = new DateTime(pd.getTime());
          Days days = Days.daysBetween(morerecent, lessrecent);
          windowcount += days.getDays();
          pd = d;
        }
        prevDates.add(d);
      }
     
    } else {     
      String domainsprefix = properties.getProperty(DOMAINSPREFIXKEY);
      String resipsprefix = properties.getProperty(RESIPSPREFIXKEY);
     
      ArrayList<String> tablenames = new ArrayList<String>();
      ResultSet rs1 = null;
      try{
        rs1 = dbi.executeQueryWithResult(properties
            .getProperty(TABLES_QUERY1KEY));
        while (rs1.next()) {
          tablenames.add(rs1.getString(1));
        }
      } catch(Exception e){
        if(log.isErrorEnabled()){
          log.error(e);
        }
      } finally {
        if(rs1 != null && !rs1.isClosed()){
          rs1.close();
        }
      }
 
      GregorianCalendar cal = new GregorianCalendar();
      cal.setTime(log_date);
      for (int i = 0; i < window; i++) {
        cal.roll(Calendar.DAY_OF_YEAR, false);
        Date temp = cal.getTime();
        String datestr = df.format(temp);
        if (tablenames.contains(domainsprefix + "_" + datestr)
            && tablenames.contains(resipsprefix + "_" + datestr)) {
          prevDates.add(temp);
        }
      }
     
      //cache the values for later
      if(prevDateBuf == null){
        prevDateBuf = new ArrayList<Date>();
      } else {
        prevDateBuf.clear();
      }
      prevDateBuf.addAll(prevDates);
      prevDateBufDate = log_date;
      prevDateBufWindow = window;
    }
    return prevDates;
  }
 
 
  /**
   * Retrieves the number of dns queries per domain for each cluster
   * generated on a specific run date.
   *
   * @param log_date the run date
   * @return a table of values where the keys are cluster ids and the values
   *     are the queries per domain value
   * @throws SQLException if there is an error retrieving the queries
   *     per domain values
   */
  private Hashtable<Integer, Double> getQueriesPerDomain(Date log_date) throws SQLException{
    Hashtable<Integer, Double> retval = new Hashtable<Integer, Double>();
    StringBuffer querybuf = new StringBuffer();
    Formatter formatter = new Formatter(querybuf);
    formatter.format(properties.getProperty(PREVCLUSTER_QUERY3KEY), df.format(log_date));
    ResultSet rs = null;
    try{
      rs = dbi.executeQueryWithResult(querybuf.toString());
      while(rs.next()){
        retval.put(rs.getInt(1), rs.getDouble(2));
      }
    } catch(Exception e) {
      if(log.isErrorEnabled()){
        log.error(e);
      }
    } finally {
      if(rs != null && !rs.isClosed()){
        rs.close();
      }
      formatter.close();
    }
    return retval;
  }
 
  /**
   * Calculates the domains per network feature for each cluster generated
   * on a specific run date and stores them in the database.
   *
   * @param log_date the run date
   * @throws Exception if there is an error calculating or storing the
   *     feature values
   */
  public void updateDomainsPerNetwork(Date log_date) throws Exception{
    Map<Integer, Double> dpn =
        this.calculateDomainsPerNetwork(log_date,
            Integer.parseInt(properties.getProperty(DOMAINSPERNETWORK_WINDOWKEY)));
    for(int clusterid : dpn.keySet()){
      StringBuffer querybuf = new StringBuffer();
      Formatter formatter = new Formatter(querybuf);
      formatter.format(properties.getProperty(DOMAINSPERNETWORK_QUERY3KEY),
          df.format(log_date), dpn.get(clusterid).toString(), String.valueOf(clusterid));
      dbi.executeQueryNoResult(querybuf.toString());
      formatter.close();
    }
  }
 
  /**
   * Updates each cluster's longitudinal features for all clusters
   * generated during a specific run date.
   *
   * @param log_date the run date
   * @throws Exception if unable to calculate or store the longitudinal
   *     feature values
   */
  public void updateFeatures(Date log_date) throws Exception{
    String simplename = null;
    if(log.isInfoEnabled()){
      simplename = this.getClass().getSimpleName();
      log.info(simplename + " Started: "
          + Calendar.getInstance().getTime());
      log.info("Updating novelty features.");
    }
    dbi.initClusterTables(log_date);
    updateNoveltyFeature(log_date);
    if(log.isInfoEnabled()){
      log.info("Novelty features updated.");
      log.info("Updating previous cluster ratio features.");
    }
    updatePrevClusterRatios(log_date);
    if(log.isInfoEnabled()){
      log.info("Previous cluster ratio features updated.");
      log.info("Updating domains per network feature.");
    }
    updateDomainsPerNetwork(log_date);
    if(log.isInfoEnabled()){
      log.info("Domains per network feature updated.");
      log.info(simplename + " Finished: "
          + Calendar.getInstance().getTime());
    }
  }
 
  /**
   * Calculates the cluster novelty feature for each cluster generated
   * on a specific run date and stores them in the database.
   *
   * @param log_date the run date
   * @throws Exception if there is an error calculating or storing the feature
   *     values
   */
  public void updateNoveltyFeature(Date log_date) throws Exception{
    Map<Integer, String> windowvals = new TreeMap<Integer, String>();
    String[] windowsstr = properties.getProperty(NOVELTY_WINDOWSKEY).split(",");
    String[] windowfields = properties.getProperty(NOVELTY_WINFIELDSKEY).split(",");
   
    if(windowfields.length != windowsstr.length){
      throw new Exception("Number of novelty window values and fields do not match.");
    }
   
    for(int i = 0; i < windowsstr.length; i++){
      windowvals.put(Integer.parseInt(windowsstr[i]), windowfields[i]);
    }

    //We start from largest window to smallest so we can cache the prevDates results for later use
    List<Integer> windowkeys = new ArrayList<Integer>(windowvals.keySet());
    Collections.reverse(windowkeys);
   
    for(int window : windowkeys){
      Map<Integer, Double> novelty = calculateNoveltyFeature(log_date, window);
      for(int clusterid : novelty.keySet()){
        StringBuffer querybuf = new StringBuffer();
        Formatter formatter = new Formatter(querybuf);
        formatter.format(properties.getProperty(NOVELTY_QUERY3KEY),
            df.format(log_date), windowvals.get(window),
              String.valueOf(novelty.get(clusterid)),
              String.valueOf(clusterid), df.format(log_date));
        dbi.executeQueryNoResult(querybuf.toString());
        formatter.close();
      }
    }
   
   
  }
 

  /**
   * Calculates the previous cluster ratio feature for each cluster generated
   * on a specific run date and stores them in the database.
   *
   * @param log_date the run date
   * @throws SQLException if the feature values can not be stored in the database
   */
  public void updatePrevClusterRatios(Date log_date) throws SQLException{
    Hashtable<Integer, List<Double>> ratios =
        this.calculatePrevClusterRatios(log_date,
            Integer.parseInt(properties.getProperty(PREVCLUSTER_WINDOWKEY)));
    for(int clusterid : ratios.keySet()){
      List<Double> ratiovals = ratios.get(clusterid);
      StringBuffer querybuf = new StringBuffer();
      Formatter formatter = new Formatter(querybuf);
      formatter.format(properties.getProperty(PREVCLUSTER_QUERY4KEY),
          df.format(log_date), ratiovals.get(0).toString(),
          ratiovals.get(1).toString(), Integer.toString(clusterid));
      dbi.executeQueryNoResult(querybuf.toString());
      formatter.close();
    }
  }
}
TOP

Related Classes of edu.uga.cs.fluxbuster.features.FeatureCalculator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.