Package com.digitalpebble.classification.test

Source Code of com.digitalpebble.classification.test.TestMultiFieldDocs

/**
* Copyright 2009 DigitalPebble Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package com.digitalpebble.classification.test;

import java.util.Map;

import com.digitalpebble.classification.Document;
import com.digitalpebble.classification.Field;
import com.digitalpebble.classification.Parameters;
import com.digitalpebble.classification.RAMTrainingCorpus;
import com.digitalpebble.classification.TextClassifier;
import com.digitalpebble.classification.Vector;

public class TestMultiFieldDocs extends AbstractLearnerTest {

  public void testMultiField() throws Exception {
    Field[] fields = new Field[3];
    fields[0] = new Field("title", new String[] { "This", "is", "a",
        "title" });
    fields[1] = new Field("abstract", new String[] { "abstract" });
    fields[2] = new Field("content", new String[] { "This", "is", "the",
        "content", "this", "will", "have", "a", "large", "value" });
    learner.setMethod(Parameters.WeightingMethod.TFIDF);
    Document doc = learner.createDocument(fields, "large");

    Field[] fields2 = new Field[2];
    fields2[0] = new Field("title", new String[] { "This", "is", "not",
        "a", "title" });
    // fields2[1] = new Field("abstract", new String[]{"abstract"});
    fields2[1] = new Field("content", new String[] { "This", "is", "the",
        "content", "this", "will", "have", "a", "small", "value" });
    learner.setMethod(Parameters.WeightingMethod.TFIDF);
    Document doc2 = learner.createDocument(fields2, "small");

    // try putting the same field several times
    Field[] fields3 = new Field[3];
    fields3[0] = new Field("title", new String[] { "This", "is", "not",
        "a", "title" });
    // fields2[1] = new Field("abstract", new String[]{"abstract"});
    fields3[1] = new Field("content", new String[] { "This", "is", "the",
        "content", "this", "will", "have", "a", "small", "value" });
    fields3[2] = new Field("title", new String[] { "some", "different",
        "content" });
    learner.setMethod(Parameters.WeightingMethod.TFIDF);
    Document doc3 = learner.createDocument(fields3, "small");

    RAMTrainingCorpus corpus = new RAMTrainingCorpus();
    corpus.add(doc);
    corpus.add(doc2);
    learner.learn(corpus);

    TextClassifier classi = TextClassifier.getClassifier(tempFile);
    double[] scores = classi.classify(doc);
    assertEquals("large", classi.getBestLabel(scores));
    scores = classi.classify(doc2);
    assertEquals("small", classi.getBestLabel(scores));
    scores = classi.classify(doc3);
    assertEquals("small", classi.getBestLabel(scores));
  }

  public void testCustomWeightingScheme() throws Exception {
    Field[] fields = new Field[1];
    fields[0] = new Field("keywords", new String[] { "test","keywords"});
    learner.setMethod(Parameters.WeightingMethod.FREQUENCY);
    learner.getLexicon().setMethod(Parameters.WeightingMethod.BOOLEAN, "keywords");
    Document doc = learner.createDocument(fields, "large");
    Vector vector = doc.getFeatureVector(learner.getLexicon());
   
    // check that the values for the field keywords are boolean
    int[] indices = vector.getIndices();
    double[] values = vector.getValues();
   
    Map<Integer, String> invertedIndex = learner.getLexicon()
    .getInvertedIndex();
   
    for (int i = 0; i < indices.length; i++) {
      // retrieve the corresponding entry in the lexicon
      String label = invertedIndex.get(indices[i]);
      double expected = 1.0;
      assertEquals("label: "+label,expected, values[i]);
    }
  }

}
TOP

Related Classes of com.digitalpebble.classification.test.TestMultiFieldDocs

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.