Package com.digitalpebble.classification

Examples of com.digitalpebble.classification.Lexicon


  public static void getAttributeScores(String modelPath, String lexiconF,
      int topAttributesNumber) throws IOException {
    // load the model + the lexicon
    // try to see if we can get a list of the best scores from the model
    // works only for liblinear
    Lexicon lexicon = new Lexicon(lexiconF);
    Model liblinearModel = Model.load(new File(modelPath));
    double[] weights = liblinearModel.getFeatureWeights();
    // dump all the weights
    int numClasses = liblinearModel.getNrClass();
    int numFeatures = liblinearModel.getNrFeature();

    Map<Integer, String> invertedAttributeIndex = lexicon
        .getInvertedIndex();

    Map<String, WeightedAttributeQueue> topAttributesPerLabel = new HashMap<String, WeightedAttributeQueue>(
        numClasses);

    for (int i = 0; i < weights.length; i++) {
      // get current class num
      int classNum = i / numFeatures;
      int featNum = i % numFeatures;
      String classLabel = lexicon.getLabel(classNum);
      String attLabel = invertedAttributeIndex.get(featNum + 1);

      // display the values between -0.001 and +0.001 as 0
      if (weights[i] < 0.001 && weights[i] > -0.001)
        weights[i] = 0;
View Full Code Here


    public static void dumpBestAttributes(String raw, String lexiconF)
            throws IOException {
        // load the corpus + the lexicon
        // load the lexicon and the raw file
        Lexicon lexicon = new Lexicon(lexiconF);
        FileTrainingCorpus corpus = new FileTrainingCorpus(new File(raw));
        AttributeScorer scorer = logLikelihoodAttributeScorer.getScorer(corpus,
                lexicon);
    }
View Full Code Here

                .getProperty("compact.attribute.nums"));

        String format = props.getProperty("format");

        // load the lexicon and the raw file
        Lexicon lexicon = new Lexicon(lexiconF);

        String weightingScheme = props.getProperty(
                "classification_weight_scheme", "tfidf");
        WeightingMethod method = WeightingMethod
                .methodFromString(weightingScheme);
        lexicon.setMethod(method);

        // get the raw file
        FileTrainingCorpus ftc = new FileTrainingCorpus(new File(raw));

        int keepNBestAttributes = Integer.parseInt(props.getProperty(
                "keepNBestAttributes", "-1"));

        if (keepNBestAttributes != -1) {
            // double scores[] = logLikelihoodAttributeFilter.getScores(ftc,
            // lexicon);
            // lexicon.setLogLikelihoodRatio(scores);
            // lexicon.keepTopNAttributesLLR(keepNBestAttributes);
            AttributeScorer scorer = logLikelihoodAttributeScorer.getScorer(
                    ftc, lexicon);
            lexicon.setAttributeScorer(scorer);
            lexicon.applyAttributeFilter(scorer, keepNBestAttributes);
        } else {
            // apply the filters on the Lexicon
            int minFreq = Integer.parseInt(props
                    .getProperty("classification_minFreq"));
            int maxFreq = Integer.MAX_VALUE;

            lexicon.pruneTermsDocFreq(minFreq, maxFreq);
        }

        // change the indices of the attributes to remove
        // gaps between them
        Map<Integer, Integer> equiv = null;
        if (compact) {
            // create a new Lexicon object
            equiv = lexicon.compact();
        }

        // save the modified lexicon file
        if (newLexicon != null)
            lexicon.saveToFile(newLexicon);

        // dump a new vector file
        Utils.writeExamples(ftc, lexicon, true, vector_location, equiv, format);
    }
View Full Code Here

  public static void getAttributeScores(String modelPath, String lexiconF,
      int topAttributesNumber) throws IOException {
    // load the model + the lexicon
    // try to see if we can get a list of the best scores from the model
    // works only for liblinear
    Lexicon lexicon = new Lexicon(lexiconF);
    Model liblinearModel = Model.load(new File(modelPath));
    double[] weights = liblinearModel.getFeatureWeights();
    // dump all the weights
    int numClasses = liblinearModel.getNrClass();
    int numFeatures = liblinearModel.getNrFeature();

    Map<Integer, String> invertedAttributeIndex = lexicon
        .getInvertedIndex();

    Map<String, WeightedAttributeQueue> topAttributesPerLabel = new HashMap<String, WeightedAttributeQueue>(
        numClasses);

    // for (int i = 0; i < nr_w; i++) {
    // double contrib = w[(idx - 1) * nr_w + i] * lx.getValue();
    // }
    //
    // idx 1 in class 1 -> 0 x 22 + 0 = 0
    // idx 2 in class 1 -> 1 x 22 + 0 = 22
    // idx 1 in class 2 -> 0 x 22 + 1 = 1
    // idx 2 in class 2 -> 1 x 22 + 1 = 23

    // initialise the queues
    if (topAttributesNumber != -1) {
      for (int classNum = 0; classNum < numClasses; classNum++) {
        String classLabel = lexicon.getLabel(classNum);
        WeightedAttributeQueue queue = new WeightedAttributeQueue(
            topAttributesNumber);
        topAttributesPerLabel.put(classLabel, queue);
      }
    }

    for (int classNum = 0; classNum < numClasses; classNum++) {
      String classLabel = lexicon.getLabel(classNum);
      WeightedAttributeQueue queue = topAttributesPerLabel
          .get(classLabel);
      for (int featNum = 0; featNum < numFeatures; featNum++) {
        int pos = featNum * numClasses + classNum;
        double featWeight = weights[pos];
View Full Code Here

  private boolean cross_validation = false;

  public LibSVMModelCreator(String lexicon_location, String model_location,
      String vectorFile) {
    lexicon = new Lexicon();
    this.model_file_name = model_location;
    this.lexiconLocation = lexicon_location;
    this.vector_location = vectorFile;
  }
View Full Code Here

  private String outputLearner;

  public LibLinearModelCreator(String lexicon_location,
      String model_location, String vector_location) {
    lexicon = new Lexicon();
    this.SVM_Model_location = model_location;
    this.lexiconLocation = lexicon_location;
    this.vector_location = vector_location;

    learner_filename = System.getProperty("liblinear_train",
View Full Code Here

TOP

Related Classes of com.digitalpebble.classification.Lexicon

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.