Package weka.associations.gsp

Source Code of weka.associations.gsp.Sequence

/*
*    This program is free software; you can redistribute it and/or modify
*    it under the terms of the GNU General Public License as published by
*    the Free Software Foundation; either version 2 of the License, or
*    (at your option) any later version.
*
*    This program is distributed in the hope that it will be useful,
*    but WITHOUT ANY WARRANTY; without even the implied warranty of
*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*    GNU General Public License for more details.
*
*    You should have received a copy of the GNU General Public License
*    along with this program; if not, write to the Free Software
*    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

/*
* Sequence.java
* Copyright (C) 2007 Sebastian Beer
*
*/

package weka.associations.gsp;

import weka.core.FastVector;
import weka.core.Instances;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;

import java.io.Serializable;
import java.util.Enumeration;

/**
* Class representing a sequence of elements/itemsets.
*
* @author  Sebastian Beer
* @version $Revision: 1.2 $
*/
public class Sequence
  implements Cloneable, Serializable, RevisionHandler {

  /** for serialization */
  private static final long serialVersionUID = -5001018056339156390L;

  /** the support count of the Sequence */
  protected int m_SupportCount;
 
  /** ordered list of the comprised elements/itemsets */
  protected FastVector m_Elements;

  /**
   * Constructor.
   */
  public Sequence() {
    m_SupportCount = 0;
    m_Elements = new FastVector();
  }

  /**
   * Constructor accepting a set of elements as parameter.
   *
   * @param elements     the Elements of the Sequence
   */
  public Sequence(FastVector elements) {
    m_SupportCount = 0;
    m_Elements = elements;
  }

  /**
   * Constructor accepting an int value as parameter to set the support count.
   *
   * @param supportCount   the support count to set
   */
  public Sequence(int supportCount) {
    m_SupportCount = supportCount;
    m_Elements = new FastVector();
  }

  /**
   * Generates all possible candidate k-Sequences and prunes the ones that
   * contain an infrequent (k-1)-Sequence.
   *
   * @param kMinusOneSequences   the set of (k-1)-Sequences, used for verification
   * @return       the generated set of k-candidates
   * @throws CloneNotSupportedException
   */
  public static FastVector aprioriGen(FastVector kMinusOneSequences) throws CloneNotSupportedException {
    FastVector allCandidates = generateKCandidates(kMinusOneSequences);
    FastVector prunedCandidates = pruneCadidates(allCandidates, kMinusOneSequences);

    return prunedCandidates;
  }

  /**
   * Deletes Sequences of a given set which don't meet the minimum support
   * count threshold.
   *
   * @param sequences     the set Sequences to be checked
   * @param minSupportCount   the minimum support count
   * @return       the set of Sequences after deleting
   */
  public static FastVector deleteInfrequentSequences(FastVector sequences, long minSupportCount) {
    FastVector deletedSequences = new FastVector();
    Enumeration seqEnum = sequences.elements();

    while (seqEnum.hasMoreElements()) {
      Sequence currentSeq = (Sequence) seqEnum.nextElement();
      long curSupportCount = currentSeq.getSupportCount();

      if (curSupportCount >= minSupportCount) {
  deletedSequences.addElement(currentSeq);
      }
    }
    return deletedSequences;
  }

  /**
   * Generates candidate k-Sequences on the basis of a given (k-1)-Sequence set.
   *
   * @param kMinusOneSequences   the set of (k-1)-Sequences
   * @return       the set of candidate k-Sequences
   * @throws CloneNotSupportedException
   */
  protected static FastVector generateKCandidates(FastVector kMinusOneSequences) throws CloneNotSupportedException {
    FastVector candidates = new FastVector();
    FastVector mergeResult = new FastVector();

    for (int i = 0; i < kMinusOneSequences.size(); i++) {
      for (int j = 0; j < kMinusOneSequences.size(); j++) {
  Sequence originalSeq1 = (Sequence) kMinusOneSequences.elementAt(i);
  Sequence seq1 = originalSeq1.clone();
  Sequence originalSeq2 = (Sequence) kMinusOneSequences.elementAt(j);
  Sequence seq2 = originalSeq2.clone();
  Sequence subseq1 = seq1.deleteEvent("first");
  Sequence subseq2 = seq2.deleteEvent("last");

  if (subseq1.equals(subseq2)) {
    //seq1 and seq2 are 1-sequences
    if ((subseq1.getElements().size() == 0) && (subseq2.getElements().size() == 0)) {
      if (i >= j) {
        mergeResult = merge(seq1, seq2, true, true);
      } else {
        mergeResult = merge(seq1, seq2, true, false);
      }
      //seq1 and seq2 are k-sequences
    } else {
      mergeResult = merge(seq1, seq2, false, false);
    }
    candidates.appendElements(mergeResult);
  }
      }
    }
    return candidates;
  }

  /**
   * Merges two Sequences in the course of candidate generation. Differentiates
   * between merging 1-Sequences and k-Sequences, k > 1.
   *
   * @param seq1     Sequence at first position
   * @param seq2     Sequence at second position
   * @param oneElements   true, if 1-Elements should be merged, else false
   * @param mergeElements   true, if two 1-Elements were not already merged
   *         (regardless of their position), else false
   * @return       set of resulting Sequences
   */
  protected static FastVector merge(Sequence seq1, Sequence seq2, boolean oneElements, boolean mergeElements) {
    FastVector mergeResult = new FastVector();

    //merge 1-sequences
    if (oneElements) {
      Element element1 = (Element) seq1.getElements().firstElement();
      Element element2 = (Element) seq2.getElements().firstElement();
      Element element3 = null;
      if (mergeElements) {
  for (int i = 0; i < element1.getEvents().length; i++) {
    if (element1.getEvents()[i] > -1) {
      if (element2.getEvents()[i] > -1) {
        break;
      } else {
        element3 = Element.merge(element1, element2);
      }
    }
  }
      }
      FastVector newElements1 = new FastVector();
      //generate <{x}{y}>
      newElements1.addElement(element1);
      newElements1.addElement(element2);
      mergeResult.addElement(new Sequence(newElements1));
      //generate <{x,y}>
      if (element3 != null) {
  FastVector newElements2 = new FastVector();
  newElements2.addElement(element3);
  mergeResult.addElement(new Sequence(newElements2));
      }

      return mergeResult;
      //merge k-sequences, k > 1
    } else {
      Element lastElementSeq1 = (Element) seq1.getElements().lastElement();
      Element lastElementSeq2 = (Element) seq2.getElements().lastElement();
      Sequence resultSeq = new Sequence();
      FastVector resultSeqElements = resultSeq.getElements();

      //if last two events/items belong to the same element/itemset
      if (lastElementSeq2.containsOverOneEvent()) {
  for (int i = 0; i < (seq1.getElements().size()-1); i++) {
    resultSeqElements.addElement(seq1.getElements().elementAt(i));
  }
  resultSeqElements.addElement(Element.merge(lastElementSeq1, lastElementSeq2));
  mergeResult.addElement(resultSeq);

  return mergeResult;
  //if last two events/items belong to different elements/itemsets
      } else {
  for (int i = 0; i < (seq1.getElements().size()); i++) {
    resultSeqElements.addElement(seq1.getElements().elementAt(i));
  }
  resultSeqElements.addElement(lastElementSeq2);
  mergeResult.addElement(resultSeq);

  return mergeResult;
      }
    }
  }

  /**
   * Converts a set of 1-Elements into a set of 1-Sequences.
   *
   * @param elements     the set of 1-Elements
   * @return       the set of 1-Sequences
   */
  public static FastVector oneElementsToSequences(FastVector elements) {
    FastVector sequences = new FastVector();
    Enumeration elementEnum = elements.elements();

    while (elementEnum.hasMoreElements()) {
      Sequence seq = new Sequence();
      FastVector seqElements = seq.getElements();
      seqElements.addElement(elementEnum.nextElement());
      sequences.addElement(seq);
    }
    return sequences;
  }

  /**
   * Prints a set of Sequences as String output.
   *
   * @param setOfSequences  the set of sequences
   */
  public static void printSetOfSequences(FastVector setOfSequences) {
    Enumeration seqEnum = setOfSequences.elements();
    int i = 1;

    while(seqEnum.hasMoreElements()) {
      Sequence seq = (Sequence) seqEnum.nextElement();
      System.out.print("[" + i++ + "]" + " " + seq.toString());
    }
  }

  /**
   * Prunes a k-Sequence of a given candidate set if one of its (k-1)-Sequences
   * is infrequent.
   *
   * @param allCandidates   the set of all potential k-Sequences
   * @param kMinusOneSequences   the set of (k-1)-Sequences for verification
   * @return       the set of the pruned candidates
   */
  protected static FastVector pruneCadidates(FastVector allCandidates, FastVector kMinusOneSequences) {
    FastVector prunedCandidates = new FastVector();
    boolean isFrequent;
    //for each candidate
    for (int i = 0; i < allCandidates.size(); i++) {
      Sequence candidate = (Sequence) allCandidates.elementAt(i);
      isFrequent = true;
      FastVector canElements = candidate.getElements();
      //generate each possible (k-1)-sequence and verify if it's frequent
      for (int j = 0; j < canElements.size(); j++) {
  if(isFrequent) {
    Element origElement = (Element) canElements.elementAt(j);
    int[] origEvents = origElement.getEvents();

    for (int k = 0; k < origEvents.length; k++) {
      if (origEvents[k] > -1) {
        int helpEvent = origEvents[k];
        origEvents[k] = -1;

        if (origElement.isEmpty()) {
    canElements.removeElementAt(j);
    //check if the (k-1)-sequence is contained in the set of kMinusOneSequences
    int containedAt = kMinusOneSequences.indexOf(candidate);
    if (containedAt != -1) {
      origEvents[k] = helpEvent;
      canElements.insertElementAt(origElement, j);
      break;
    } else {
      isFrequent = false;
      break;
    }
        } else {
    //check if the (k-1)-sequence is contained in the set of kMinusOneSequences
    int containedAt = kMinusOneSequences.indexOf(candidate);
    if (containedAt != -1) {
      origEvents[k] = helpEvent;
      continue;
    } else {
      isFrequent = false;
      break;
    }
        }
      }
    }
  } else {
    break;
  }
      }
      if (isFrequent) {
  prunedCandidates.addElement(candidate);
      }
    }
    return prunedCandidates;
  }

  /**
   * Returns a String representation of a set of Sequences where the numeric
   * value of each event/item is represented by its respective nominal value.
   *
   * @param setOfSequences   the set of Sequences
   * @param dataSet     the corresponding data set containing the header
   *         information
   * @param filterAttributes  the attributes to filter out
   * @return       the String representation
   */
  public static String setOfSequencesToString(FastVector setOfSequences, Instances dataSet, FastVector filterAttributes) {
    StringBuffer resString = new StringBuffer();
    Enumeration SequencesEnum = setOfSequences.elements();
    int i = 1;
    boolean printSeq;

    while(SequencesEnum.hasMoreElements()) {
      Sequence seq = (Sequence) SequencesEnum.nextElement();
      Integer filterAttr = (Integer) filterAttributes.elementAt(0);
      printSeq = true;

      if (filterAttr.intValue() != -1) {
  for (int j=0; j < filterAttributes.size(); j++) {
    filterAttr = (Integer) filterAttributes.elementAt(j);
    FastVector seqElements = seq.getElements();

    if (printSeq) {
      for (int k=0; k < seqElements.size(); k++) {
        Element currentElement = (Element) seqElements.elementAt(k);
        int[] currentEvents = currentElement.getEvents();

        if (currentEvents[filterAttr.intValue()] != -1) {
    continue;
        } else {
    printSeq = false;
    break;
        }
      }
    }
  }
      }
      if (printSeq) {
  resString.append("[" + i++ + "]" + " " + seq.toNominalString(dataSet));
      }
    }
    return resString.toString();
  }

  /**
   * Updates the support count of a set of Sequence candidates according to a
   * given set of data sequences.
   *
   * @param candidates     the set of candidates
   * @param dataSequences   the set of data sequences
   */
  public static void updateSupportCount(FastVector candidates, FastVector dataSequences) {
    Enumeration canEnumeration = candidates.elements();

    while(canEnumeration.hasMoreElements()){
      Enumeration dataSeqEnumeration = dataSequences.elements();
      Sequence candidate = (Sequence) canEnumeration.nextElement();

      while(dataSeqEnumeration.hasMoreElements()) {
  Instances dataSequence = (Instances) dataSeqEnumeration.nextElement();

  if (candidate.isSubsequenceOf(dataSequence)) {
    candidate.setSupportCount(candidate.getSupportCount() + 1);
  }
      }
    }
  }

  /**
   * Returns a deep clone of a Sequence.
   *
   * @return     the cloned Sequence
   */
  public Sequence clone() {
    try {
      Sequence clone = (Sequence) super.clone();

      clone.setSupportCount(m_SupportCount);
      FastVector cloneElements = new FastVector(m_Elements.size());

      for (int i = 0; i < m_Elements.size(); i++) {
  Element helpElement = (Element) m_Elements.elementAt(i);
  cloneElements.addElement(helpElement.clone());
      }
      clone.setElements(cloneElements);

      return clone;
    } catch (CloneNotSupportedException exc) {
      exc.printStackTrace();
    }
    return null;
  }

  /**
   * Deletes either the first or the last event/item of a Sequence. If the
   * deleted event/item is the only value in the Element, it is removed, as well.
   *
   * @param position     the position of the event/item (first or last)
   * @return       the Sequence with either the first or the last
   *         event/item deleted
   */
  protected Sequence deleteEvent(String position) {
    Sequence cloneSeq = clone();

    if (position.equals("first")) {
      Element element = (Element) cloneSeq.getElements().firstElement();
      element.deleteEvent("first");
      if (element.isEmpty()) {
  cloneSeq.getElements().removeElementAt(0);
      }
      return cloneSeq;
    }
    if (position.equals("last")) {
      Element element = (Element) cloneSeq.getElements().lastElement();
      element.deleteEvent("last");
      if (element.isEmpty()) {
  cloneSeq.getElements().removeElementAt(m_Elements.size()-1);
      }
      return cloneSeq;
    }
    return null;
  }

  /**
   * Checks if two Sequences are equal.
   *
   * @return       true, if the two Sequences are equal, else false
   */
  public boolean equals(Object obj) {
    Sequence seq2 = (Sequence) obj;
    FastVector seq2Elements = seq2.getElements();

    for (int i = 0; i < m_Elements.size(); i++) {
      Element thisElement = (Element) m_Elements.elementAt(i);
      Element seq2Element = (Element) seq2Elements.elementAt(i);
      if (!thisElement.equals(seq2Element)) {
  return false;
      }
    }
    return true;
  }

  /**
   * Returns the Elements of the Sequence.
   *
   * @return       the Elements
   */
  protected FastVector getElements() {
    return m_Elements;
  }

  /**
   * Returns the support count of the Sequence.
   *
   * @return       the support count
   */
  protected int getSupportCount() {
    return m_SupportCount;
  }

  /**
   * Checks if the Sequence is subsequence of a given data sequence.
   *
   * @param dataSequence   the data sequence to verify against
   * @return       true, if the Sequnce is subsequence of the data
   *         sequence, else false
   */
  protected boolean isSubsequenceOf(Instances dataSequence) {
    FastVector elements = getElements();
    Enumeration elementEnum = elements.elements();
    Element curElement = (Element) elementEnum.nextElement();

    for (int i = 0; i < dataSequence.numInstances(); i++) {
      if (curElement.isContainedBy(dataSequence.instance(i))) {
  if (!elementEnum.hasMoreElements()) {
    return true;
  } else {
    curElement = (Element) elementEnum.nextElement();
    continue;
  }
      }
    }
    return false;
  }

  /**
   * Sets the Elements of the Sequence.
   *
   * @param elements     the Elements to set
   */
  protected void setElements(FastVector elements) {
    m_Elements = elements;
  }

  /**
   * Sets the support count of the Sequence.
   *
   * @param supportCount   the support count to set
   */
  protected void setSupportCount(int supportCount) {
    m_SupportCount = supportCount;
  }

  /**
   * Returns a String representation of a Sequences where the numeric value
   * of each event/item is represented by its respective nominal value.
   *
   * @param dataSet     the corresponding data set containing the header
   *         information
   * @return       the String representation
   */
  public String toNominalString(Instances dataSet) {
    String result = "";

    result += "<";

    for (int i = 0; i < m_Elements.size(); i++) {
      Element element = (Element) m_Elements.elementAt(i);
      result += element.toNominalString(dataSet);
    }
    result += "> (" + getSupportCount() + ")\n";

    return result;
  }

  /**
   * Returns a String representation of a Sequence.
   *
   * @return       the String representation
   */
  public String toString() {
    String result = "";

    result += "Sequence Output\n";
    result += "------------------------------\n";
    result += "Support Count: " + getSupportCount() + "\n";
    result += "contained elements/itemsets:\n";

    for (int i = 0; i < m_Elements.size(); i++) {
      Element element = (Element) m_Elements.elementAt(i);
      result += element.toString();
    }
    result += "\n\n";

    return result;
  }
 
  /**
   * Returns the revision string.
   *
   * @return    the revision
   */
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 1.2 $");
  }
}
TOP

Related Classes of weka.associations.gsp.Sequence

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.