Package org.apache.mahout.classifier

Examples of org.apache.mahout.classifier.ResultAnalyzer


    Collection<String> categories
      = new ArrayList<String>(catCount);
    for (int i=0; i < catCount; i++) {
      categories.add(categorizer.getCategory(i));
    }
    ResultAnalyzer resultAnalyzer = //<co id="tmx.results"/>
        new ResultAnalyzer(categories, "unknown");
    runTest(inputFiles, categorizer, tokenizer, resultAnalyzer); //<co id="tmx.run"/>
    /*<calloutlist>
    <callout arearefs="tmx.feature">Setup Feature Generators</callout>
    <callout arearefs="tmx.modelreader">Load Model</callout>
    <callout arearefs="tmx.categorizer">Create Categorizer</callout>
View Full Code Here


      String line = null;
      //<start id="lucene.examples.mlt.test"/>
      final ClassifierResult UNKNOWN = new ClassifierResult("unknown",
              1.0);
     
      ResultAnalyzer resultAnalyzer = //<co id="co.mlt.ra"/>
        new ResultAnalyzer(categorizer.getCategories(),
            UNKNOWN.getLabel());

      for (File ff: inputFiles) { //<co id="co.mlt.read"/>
        BufferedReader in =
            new BufferedReader(
                new InputStreamReader(
                    new FileInputStream(ff),
                    "UTF-8"));
        while ((line = in.readLine()) != null) {
          String[] parts = line.split("\t");
          if (parts.length != 2) {
            continue;
          }
         
          CategoryHits[] hits //<co id="co.mlt.cat"/>
            = categorizer.categorize(new StringReader(parts[1]));
          ClassifierResult result = hits.length > 0 ? hits[0] : UNKNOWN;
          resultAnalyzer.addInstance(parts[0], result); //<co id="co.mlt.an"/>
        }
       
        in.close();
      }

      System.out.println(resultAnalyzer.toString());//<co id="co.mlt.print"/>
      /*
      <calloutlist>
        <callout arearefs="co.mlt.ra">Create <classname>ResultAnalyzer</classname></callout>
        <callout arearefs="co.mlt.read">Read Test data</callout>
        <callout arearefs="co.mlt.cat">Categorize</callout>
View Full Code Here

    } else {
      throw new IllegalArgumentException("Unrecognized dataSource type: " + params.get("dataSource"));
    }
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(), params.get("defaultCat"));
    TimingStatistics totalStatistics = new TimingStatistics();
    if (subdirs != null) {
     
      for (File file : subdirs) {
        if (verbose) {
          log.info("--------------");
          log.info("Testing: {}", file);
        }
        TimingStatistics operationStats = new TimingStatistics();
       
        long lineNum = 0;
        for (String line : new FileLineIterable(new File(file.getPath()), Charset.forName(params
            .get("encoding")), false)) {
         
          Map<String,List<String>> document = new NGrams(line, Integer.parseInt(params.get("gramSize")))
              .generateNGrams();
          for (Map.Entry<String,List<String>> stringListEntry : document.entrySet()) {
            String correctLabel = stringListEntry.getKey();
            List<String> strings = stringListEntry.getValue();
            TimingStatistics.Call call = operationStats.newCall();
            TimingStatistics.Call outercall = totalStatistics.newCall();
            ClassifierResult classifiedLabel = classifier.classifyDocument(strings.toArray(new String[strings
                .size()]), params.get("defaultCat"));
            call.end();
            outercall.end();
            boolean correct = resultAnalyzer.addInstance(correctLabel, classifiedLabel);
            if (verbose) {
              // We have one document per line
              log.info("Line Number: {} Line(30): {} Expected Label: {} Classified Label: {} Correct: {}",
                new Object[] {lineNum, line.length() > 30 ? line.substring(0, 30) : line, correctLabel,
                              classifiedLabel.getLabel(), correct,});
            }
            // log.info("{} {}", correctLabel, classifiedLabel);
           
          }
          lineNum++;
        }
        /*
         * log.info("{}\t{}\t{}/{}", new Object[] {correctLabel,
         * resultAnalyzer.getConfusionMatrix().getAccuracy(correctLabel),
         * resultAnalyzer.getConfusionMatrix().getCorrect(correctLabel),
         * resultAnalyzer.getConfusionMatrix().getTotal(correctLabel)});
         */
        log.info("Classified instances from {}", file.getName());
        if (verbose) {
          log.info("Performance stats {}", operationStats.toString());
        }
      }
     
    }
    if (verbose) {
      log.info("{}", totalStatistics.toString());
    }
    log.info(resultAnalyzer.summarize());
  }
View Full Code Here

   
    Algorithm algorithm = new BayesAlgorithm();
    Datastore datastore = new InMemoryBayesDatastore(params);
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(), params.get("defaultCat"));
   
    for (String[] entry : ClassifierData.DATA) {
      List<String> document = new NGrams(entry[1], Integer.parseInt(params.get("gramSize")))
          .generateNGramsWithoutLabel();
      assertEquals(3, classifier.classifyDocument(document.toArray(new String[document.size()]),
        params.get("defaultCat"), 100).length);
      ClassifierResult result = classifier.classifyDocument(document.toArray(new String[document.size()]), params
          .get("defaultCat"));
      assertEquals(entry[0], result.getLabel());
      resultAnalyzer.addInstance(entry[0], result);
    }
    int[][] matrix = resultAnalyzer.getConfusionMatrix().getConfusionMatrix();
    for (int i = 0; i < 3; i++) {
      for (int j = 0; j < 3; j++) {
        if (i == j)
          assertEquals(4, matrix[i][j]);
        else
View Full Code Here

   
    Algorithm algorithm = new CBayesAlgorithm();
    Datastore datastore = new InMemoryBayesDatastore(params);
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(), params.get("defaultCat"));
    for (String[] entry : ClassifierData.DATA) {
      List<String> document = new NGrams(entry[1], Integer.parseInt(params.get("gramSize")))
          .generateNGramsWithoutLabel();
      assertEquals(3, classifier.classifyDocument(document.toArray(new String[document.size()]),
        params.get("defaultCat"), 100).length);
      ClassifierResult result = classifier.classifyDocument(document.toArray(new String[document.size()]), params
          .get("defaultCat"));
      assertEquals(entry[0], result.getLabel());
      resultAnalyzer.addInstance(entry[0], result);
    }
    int[][] matrix = resultAnalyzer.getConfusionMatrix().getConfusionMatrix();
    for (int i = 0; i < 3; i++) {
      for (int j = 0; j < 3; j++) {
        if (i == j)
          assertEquals(4, matrix[i][j]);
        else
View Full Code Here

    SequenceFileDirIterable<Text, VectorWritable> dirIterable =
        new SequenceFileDirIterable<Text, VectorWritable>(getOutputPath(),
                                                          PathType.LIST,
                                                          PathFilters.partFilter(),
                                                          getConf());
    ResultAnalyzer analyzer = new ResultAnalyzer(labelMap.values(), "DEFAULT");
    analyzeResults(labelMap, dirIterable, analyzer);

    log.info("{} Results: {}", complementary ? "Complementary" : "Standard NB", analyzer);
    return 0;
  }
View Full Code Here

    } else {
      throw new IllegalArgumentException("Unrecognized dataSource type: " + params.get("dataSource"));
    }
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(), params.get("defaultCat"));
    TimingStatistics totalStatistics = new TimingStatistics();
    if (subdirs != null) {
     
      for (File file : subdirs) {
        if (verbose) {
          log.info("--------------");
          log.info("Testing: {}", file);
        }
        TimingStatistics operationStats = new TimingStatistics();
       
        long lineNum = 0;
        for (String line : new FileLineIterable(new File(file.getPath()), Charset.forName(params
            .get("encoding")), false)) {
         
          Map<String,List<String>> document = new NGrams(line, Integer.parseInt(params.get("gramSize")))
              .generateNGrams();
          for (Map.Entry<String,List<String>> stringListEntry : document.entrySet()) {
            String correctLabel = stringListEntry.getKey();
            List<String> strings = stringListEntry.getValue();
            TimingStatistics.Call call = operationStats.newCall();
            TimingStatistics.Call outercall = totalStatistics.newCall();
            ClassifierResult classifiedLabel = classifier.classifyDocument(strings.toArray(new String[strings
                .size()]), params.get("defaultCat"));
            call.end();
            outercall.end();
            boolean correct = resultAnalyzer.addInstance(correctLabel, classifiedLabel);
            if (verbose) {
              // We have one document per line
              log.info("Line Number: {} Line(30): {} Expected Label: {} Classified Label: {} Correct: {}",
                new Object[] {lineNum, line.length() > 30 ? line.substring(0, 30) : line, correctLabel,
                              classifiedLabel.getLabel(), correct,});
            }
            // log.info("{} {}", correctLabel, classifiedLabel);
           
          }
          lineNum++;
        }
        ConfusionMatrix matrix = resultAnalyzer.getConfusionMatrix();
        log.info("{}", matrix);
        BayesClassifierDriver.confusionMatrixSeqFileExport(params, matrix);

        log.info("ConfusionMatrix: {}", matrix.toString());
          
View Full Code Here

   
    Algorithm algorithm = new BayesAlgorithm();
    Datastore datastore = new InMemoryBayesDatastore(params);
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(), params.get("defaultCat"));
   
    for (String[] entry : ClassifierData.DATA) {
      List<String> document = new NGrams(entry[1], params.getGramSize()).generateNGramsWithoutLabel();
      assertEquals(3, classifier.classifyDocument(document.toArray(new String[document.size()]),
        params.get("defaultCat"), 100).length);
      ClassifierResult result = classifier.classifyDocument(document.toArray(new String[document.size()]), params
          .get("defaultCat"));
      assertEquals(entry[0], result.getLabel());
      resultAnalyzer.addInstance(entry[0], result);
    }
    int[][] matrix = resultAnalyzer.getConfusionMatrix().getConfusionMatrix();
    for (int i = 0; i < 3; i++) {
      for (int j = 0; j < 3; j++) {
        assertEquals(i == j ? 4 : 0, matrix[i][j]);
      }
    }
View Full Code Here

   
    Algorithm algorithm = new CBayesAlgorithm();
    Datastore datastore = new InMemoryBayesDatastore(params);
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(), params.get("defaultCat"));
    for (String[] entry : ClassifierData.DATA) {
      List<String> document = new NGrams(entry[1], params.getGramSize()).generateNGramsWithoutLabel();
      assertEquals(3, classifier.classifyDocument(document.toArray(new String[document.size()]),
        params.get("defaultCat"), 100).length);
      ClassifierResult result = classifier.classifyDocument(document.toArray(new String[document.size()]), params
          .get("defaultCat"));
      assertEquals(entry[0], result.getLabel());
      resultAnalyzer.addInstance(entry[0], result);
    }
    int[][] matrix = resultAnalyzer.getConfusionMatrix().getConfusionMatrix();
    for (int i = 0; i < 3; i++) {
      for (int j = 0; j < 3; j++) {
        assertEquals(i == j ? 4 : 0, matrix[i][j]);
      }
    }
View Full Code Here

        newsGroups.intern(newsgroup.getName());
        files.addAll(Arrays.asList(newsgroup.listFiles()));
      }
    }
    System.out.printf("%d test files\n", files.size());
    ResultAnalyzer ra = new ResultAnalyzer(newsGroups.values(), "DEFAULT");
    for (File file : files) {
      String ng = file.getParentFile().getName();

      int actual = newsGroups.intern(ng);
      NewsgroupHelper helper = new NewsgroupHelper();
      Vector input = helper.encodeFeatureVector(file, actual, 0, overallCounts);//no leak type ensures this is a normal vector
      Vector result = classifier.classifyFull(input);
      int cat = result.maxValueIndex();
      double score = result.maxValue();
      double ll = classifier.logLikelihood(actual, input);
      ClassifierResult cr = new ClassifierResult(newsGroups.values().get(cat), score, ll);
      ra.addInstance(newsGroups.values().get(actual), cr);

    }
    output.printf("%s\n\n", ra.toString());
  }
View Full Code Here

TOP

Related Classes of org.apache.mahout.classifier.ResultAnalyzer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.