Package uk.ac.cam.ch.wwmm.ptclib.misc

Examples of uk.ac.cam.ch.wwmm.ptclib.misc.ClassificationEvaluator


          if(st2 == null) st2 = "";
         
          if(t2.equals("CM")) st2 = "EXACT";
         
          if(!evals.containsKey(t1)) {
            evals.put(t1, new ClassificationEvaluator());
          }
          evals.get(t1).logEvent(st1, st2);
         
          if(!pevals.containsKey(t1)) {
            pevals.put(t1, new ClassificationEvaluator());
          }
          pevals.get(t1).logEvent(st1, st2);
         
          if(!st1.equals(st2)) {
            int start = Integer.parseInt(e1.getAttributeValue("xtspanstart"));
View Full Code Here


    }
    if(events.size() == 1) events.add(events.get(0));
    DataIndexer di = new TwoPassDataIndexer(new EventCollectorAsStream(new SimpleEventCollector(events)), 3);
    GISModel gm = GIS.trainModel(100, di);
   
    ClassificationEvaluator ce = new ClassificationEvaluator();

    List<Double> chemList = new ArrayList<Double>();
    List<Double> engList = new ArrayList<Double>();

    for(String word : testSet) {
      Collection<String> f = overallFeatures.get(word);
      String type = chemFeatures.containsKey(word) ? "CHEM" : "NONCHEM";
     
      double [] results = gm.eval(f.toArray(new String[0]));
      System.out.println(word + "\t" + gm.getAllOutcomes(results));
      ce.logEvent(type, gm.getBestOutcome(results));
      if(!gm.getBestOutcome(results).equals(type)) System.out.println("*");
      if(type.equals("CHEM")) {
        chemList.add(results[gm.getIndex("CHEM")]);
      } else {
        engList.add(results[gm.getIndex("CHEM")]);
      }

    }
    ce.pprintPrecisionRecallEval();
    ce.pprintConfusionMatrix();
   
    for(String word : unknownFeatures.keySet()) {
      Collection<String> f = unknownFeatures.get(word);
      double [] results = gm.eval(f.toArray(new String[0]));
      System.out.println(word + "\t" + results[gm.getIndex("CHEM")] + "\t" + NGramBuilder.getInstance().testWordProb(word) + "\t" + NGram.getInstance().testWordSuffixProb(word));     
    }
   
    /*Bag<String> chemWords = getBag(root.getFirstChildElement("chemical"));
    Bag<String> nonChemWords = getBag(root.getFirstChildElement("nonchemical"));
    Bag<String> words = new Bag<String>();
    words.add(chemWords);
    words.add(nonChemWords);
    words.discardInfrequent(5);
   
   
    double overallRatio = chemWords.size() * 1.0 / nonChemWords.size();
   
    Map<String,Double> wbg = new HashMap<String,Double>();
   
    for(String word : words.getList()) {
      int chemcount = chemWords.getCount(word);
      int nonchemcount = nonChemWords.getCount(word);
      int totalcount = chemcount + nonchemcount;
      double ratio = chemcount * 1.0 / nonchemcount;
      double score = ratio / overallRatio;

      double g = 0;
      if(chemcount > 0) g += 2 * (chemcount * Math.log(chemcount / (totalcount * overallRatio)));
      if(nonchemcount > 0) g += 2 * (nonchemcount * Math.log(nonchemcount / (totalcount *(1.0-overallRatio))));
      //wbg.put(word, g);
      wbg.put(word, score);
      //if(score < 0.5 && word.contains(" ")) System.out.println(word + "\t" + chemWords.getCount(word) + "\t" + nonChemWords.getCount(word) + "\t" + score);
    }
    for(String word : StringTools.getSortedList(wbg)) {
      System.out.println(word + "\t" + chemWords.getCount(word) + "\t" + nonChemWords.getCount(word) + "\t" + wbg.get(word));
    }*/
    Collections.sort(chemList, Collections.reverseOrder());
    Collections.sort(engList, Collections.reverseOrder());
    int chemCount = 0;
    int engCount = 0;
    double map = 0.0;
    while(chemCount < chemList.size() && engCount < engList.size()) {
      if(engList.get(engCount) >= chemList.get(chemCount)) {
        engCount++;
      } else {
        chemCount++;
        double precision = chemCount * 1.0 / (chemCount + engCount);
        map += precision / chemList.size();
        if(true) {
          System.out.println((chemCount * 1.0 / chemList.size()) + "\t" + precision);
        }
      }
    }
    System.out.println(map);
   
   
    if(false) {
      ClassificationEvaluator ce2 = new ClassificationEvaluator();
      for(String word : testSet) {
        String bestWord = null;
        double bestScore = 0.0;
        for(String testWord : trainSet) {
          double score = jaccard(overallFeatures.get(word), overallFeatures.get(testWord));
          if(score > bestScore) {
            bestScore = score;
            bestWord = testWord;
          }
        }
        if(bestScore < 0.12) continue;
        String type = chemFeatures.containsKey(word) ? "CHEM" : "NONCHEM";
        String testType = chemFeatures.containsKey(bestWord) ? "CHEM" : "NONCHEM";
        System.out.println(word + "\t" + bestWord);
        if(!type.equals(testType)) {
          System.out.println("*");
        }
       
        ce2.logEvent(type, testType);
      }
      ce2.pprintPrecisionRecallEval();     
    }
  }
View Full Code Here

        if(overallFCs.getCount(f) < minOccur) be.getFeatures().remove(f);
      }
    }

   
    ClassificationEvaluator ce = new ClassificationEvaluator();
 
    if(true) {
      DecisionTree dt = new DecisionTree(bagEvents);
      dt.printTree();
      for(int i=0;i<testBagEvents.size();i++) {
        BagEvent be = testBagEvents.get(i);
        String result = dt.testBag(be.getFeatures());
        ce.logEvent(be.getClassLabel(), result);
      }
      System.out.println(ce.getAccuracy());
      System.out.println(ce.getKappa());     
      ce.pprintConfusionMatrix();
      ce.pprintPrecisionRecallEval();
      //return;
    }
   
    if(true) {
      ce = new ClassificationEvaluator();
      DecisionList dl = new DecisionList(bagEvents);
      for(int i=0;i<testBagEvents.size();i++) {
        BagEvent be = testBagEvents.get(i);
        String result = dl.testBag(be.getFeatures());
        ce.logEvent(be.getClassLabel(), result);
      }
      System.out.println(ce.getAccuracy());
      System.out.println(ce.getKappa());     
      ce.pprintConfusionMatrix();
      ce.pprintPrecisionRecallEval();
      //return;
    }
   
    ce = new ClassificationEvaluator();
    MultinomialNaiveBayes mnb = new MultinomialNaiveBayes(bagEvents);
    for(int i=0;i<testBagEvents.size();i++) {
      BagEvent be = testBagEvents.get(i);
      //Map<String,Double> results = mnb.testBag(be.getClassLabel(), be.getFeatures());
      Map<String,Double> results = mnb.testBag(be.getFeatures());
      System.out.println(be.getClassLabel() + "\t" + mnb.testBag(be.getFeatures()));
      ce.logEvent(be.getClassLabel(), mnb.bestResult(results));
    }
    System.out.println(ce.getAccuracy());
    System.out.println(ce.getKappa());     
    ce.pprintConfusionMatrix();
    ce.pprintPrecisionRecallEval();

    ce = new ClassificationEvaluator();
    List<Event> trainEvents = new ArrayList<Event>();
    List<Event> testEvents = new ArrayList<Event>();
    for(BagEvent be : bagEvents) {
      trainEvents.add(new Event(be.getClassLabel(), be.getFeatures().getSet().toArray(new String[0])));
    }
    for(BagEvent be : testBagEvents) {
      testEvents.add(new Event(be.getClassLabel(), be.getFeatures().getSet().toArray(new String[0])));
    }
    DataIndexer di = new TwoPassDataIndexer(new EventCollectorAsStream(new SimpleEventCollector(trainEvents)), 1);
    GISModel gm = GIS.trainModel(100, di);
   
    //ClassificationEvaluator ce = new ClassificationEvaluator();
   
    for(Event event : testEvents) {
      double [] results = gm.eval(event.getContext());
      String result = results[gm.getIndex("TRUE")] > 0.5 ? "TRUE" : "FALSE";
      //String result = gm.getBestOutcome(results);
      //System.out.println(event.getOutcome() + "\t" + result + "\t" + results[gm.getIndex(event.getOutcome())] + "\t" + StringTools.arrayToList(event.getContext()));
      ce.logEvent(event.getOutcome(), result);
    }
    System.out.println(ce.getAccuracy());
    System.out.println(ce.getKappa());     
    ce.pprintConfusionMatrix();
    ce.pprintPrecisionRecallEval();
   
    if(false) {
     
      List<Bag<String>> trueBags = new ArrayList<Bag<String>>();
      List<Bag<String>> falseBags = new ArrayList<Bag<String>>();
View Full Code Here

      nccount = ncs.length;
    }
    double cp = ccount / (0.0 + ccount + nccount);
    double ncp = 1.0 - cp;
   
    ClassificationEvaluator ce = new ClassificationEvaluator();
    for(int j=0;j<500;j++) {
      double thiscp = Math.log(cp);
      double thisncp = Math.log(ncp);
      for(int i=0;i<values;i++) {
        thiscp += Math.log(pdf(ut.get(i,j), cmeans.get(i), cstdevs.get(i)));
        thisncp += Math.log(pdf(ut.get(i,j), ncmeans.get(i), ncstdevs.get(i)));
      }
      String refClass = chemSet.contains(terms.get(j)) ? "CHEM" : "NONCHEM";
      String respClass = thiscp > thisncp ? "CHEM" : "NONCHEM";
      System.out.println(terms.get(j) + "\t" + (thiscp - thisncp));
      ce.logEvent(refClass, respClass);
    }
    System.out.println(ce.getAccuracy());
    System.out.println(ce.getKappa());
    ce.pprintConfusionMatrix();
    ce.pprintPrecisionRecallEval();
  }
View Full Code Here

        eventBags.add(be);
      }
    }
       
    if(false) {
      ClassificationEvaluator ce = new ClassificationEvaluator();

      MultinomialNaiveBayes mnb = new MultinomialNaiveBayes(eventBags);
      for(int i=0;i<eventBags.size();i++) {
        BagEvent be = eventBags.get(i);
      //for(BagEvent be : eventBags) {
        Map<String,Double> results = mnb.testBag(be.getClassLabel(), be.getFeatures());
        System.out.println(be.getClassLabel() + "\t" + mnb.testBag(be.getFeatures()));
        ce.logEvent(be.getClassLabel(), mnb.bestResult(results));
        String rf = "MNB:" + mnb.bestResult(results);
        Event e = events.get(i);
        String [] sa = new String[e.getContext().length + 1];
        for(int j=0;j<e.getContext().length;j++) {
          sa[j] = e.getContext()[j];
        }
        sa[e.getContext().length] = rf;
        events.set(i, new Event(e.getOutcome(), sa));
      }
      System.out.println(ce.getAccuracy());
      System.out.println(ce.getKappa());     
      ce.pprintConfusionMatrix();
      ce.pprintPrecisionRecallEval();
      //return;
    }

   
    int seed = 5;
    //for(int seed=0;seed<10;seed++) {
      Collections.shuffle(events, new Random(seed));
     
      List<Event> trainData = events.subList(0, events.size()/2);
      //trainData = new FeatureSelector().selectFeatures(trainData, 200.0);
      List<Event> testData = events.subList(events.size()/2, events.size());
     
      if(trainData.size() == 1) trainData.add(trainData.get(0));
      DataIndexer di = new TwoPassDataIndexer(new EventCollectorAsStream(new SimpleEventCollector(trainData)), 1);
      GISModel gm = GIS.trainModel(100, di);
     
      ClassificationEvaluator ce = new ClassificationEvaluator();
     
      for(Event event : testData) {
        double [] results = gm.eval(event.getContext());
        String result = results[gm.getIndex("TRUE")] > 0.5 ? "TRUE" : "FALSE";
        //String result = gm.getBestOutcome(results);
        //System.out.println(event.getOutcome() + "\t" + result + "\t" + results[gm.getIndex(event.getOutcome())] + "\t" + StringTools.arrayToList(event.getContext()));
        ce.logEvent(event.getOutcome(), result);
      }
      System.out.println("seed: " + seed);
      System.out.println(ce.getAccuracy());
      System.out.println(ce.getKappa());     
    //}
    ce.pprintConfusionMatrix();
    ce.pprintPrecisionRecallEval();

  }
View Full Code Here

 
  private void simpleEval(State state, final Map<String,ClassificationEvaluator> evaluations) throws Exception {
    NESubtypeHandler h = new NESubtypeHandler() {
      public void handle(Element annot, String type, String subtype, java.util.List<String> features) {
        if(!classifiers.containsKey(type)) return;
        if(!evaluations.containsKey(type)) evaluations.put(type, new ClassificationEvaluator());
        double [] outcomes = classifiers.get(type).eval(features.toArray(new String[0]));       
        String outcome = classifiers.get(type).getBestOutcome(outcomes);
        //subtype = flattenSubtype(subtype);
        //double prob = outcomes[classifiers.get(type).getIndex(outcome)];
        if(!subtype.equals(outcome)) {
View Full Code Here

        if(overallFCs.getCount(f) < minOccur) be.getFeatures().remove(f);
      }
    }

   
    ClassificationEvaluator ce = new ClassificationEvaluator();
 
    if(false) {
      ce = new ClassificationEvaluator();
      DecisionTree dt = new DecisionTree(bagEvents);
      dt.printTree();
      for(int i=0;i<testBagEvents.size();i++) {
        BagEvent be = testBagEvents.get(i);
        String result = dt.testBag(be.getFeatures());
        ce.logEvent(be.getClassLabel(), result);
      }
      System.out.println(ce.getAccuracy());
      System.out.println(ce.getKappa());     
      ce.pprintConfusionMatrix();
      ce.pprintPrecisionRecallEval();
      //return;
    }
   
    if(false) {
      ce = new ClassificationEvaluator();
      DecisionList dl = new DecisionList(bagEvents);
      for(int i=0;i<testBagEvents.size();i++) {
        BagEvent be = testBagEvents.get(i);
        String result = dl.testBag(be.getFeatures());
        ce.logEvent(be.getClassLabel(), result);
      }
      System.out.println(ce.getAccuracy());
      System.out.println(ce.getKappa());     
      ce.pprintConfusionMatrix();
      ce.pprintPrecisionRecallEval();
      //return;
    }
   
    if(true) {
      ce = new ClassificationEvaluator();
      MultinomialNaiveBayes mnb = new MultinomialNaiveBayes(bagEvents);
     
      Element elem = mnb.toXML();
      Document doc = new Document(elem);
      Serializer ser = new Serializer(System.out);
      //ser.setIndent(2);
      ser.write(doc);
      mnb = new MultinomialNaiveBayes(elem);
      elem = mnb.toXML();
      doc = new Document(elem);
      ser = new Serializer(System.out);
      //ser.setIndent(2);
      ser.write(doc);
     
      for(int i=0;i<testBagEvents.size();i++) {
        BagEvent be = testBagEvents.get(i);
        //Map<String,Double> results = mnb.testBag(be.getClassLabel(), be.getFeatures());
        Map<String,Double> results = mnb.testBag(be.getFeatures());
        System.out.println(be.getClassLabel() + "\t" + mnb.testBag(be.getFeatures()));
        ce.logEvent(be.getClassLabel(), mnb.bestResult(results));
        if(!be.getClassLabel().equals(mnb.bestResult(results))) {
          System.out.println(be.getFeatures());
          System.out.println(bagsToSentences.get(be.getFeatures()));
        }
      }
      System.out.println(ce.getAccuracy());
      System.out.println(ce.getKappa());     
      ce.pprintConfusionMatrix();
      ce.pprintPrecisionRecallEval();
    }

    if(false) {
      ce = new ClassificationEvaluator();
      List<Event> trainEvents = new ArrayList<Event>();
      List<Event> testEvents = new ArrayList<Event>();
      for(BagEvent be : bagEvents) {
        trainEvents.add(new Event(be.getClassLabel(), be.getFeatures().getSet().toArray(new String[0])));
      }
      for(BagEvent be : testBagEvents) {
        testEvents.add(new Event(be.getClassLabel(), be.getFeatures().getSet().toArray(new String[0])));
      }
      DataIndexer di = new TwoPassDataIndexer(new EventCollectorAsStream(new SimpleEventCollector(trainEvents)), 1);
      GISModel gm = GIS.trainModel(100, di);
     
      //ClassificationEvaluator ce = new ClassificationEvaluator();
     
      for(Event event : testEvents) {
        double [] results = gm.eval(event.getContext());
        String result = results[gm.getIndex("TRUE")] > 0.5 ? "TRUE" : "FALSE";
        //String result = gm.getBestOutcome(results);
        //System.out.println(event.getOutcome() + "\t" + result + "\t" + results[gm.getIndex(event.getOutcome())] + "\t" + StringTools.arrayToList(event.getContext()));
        ce.logEvent(event.getOutcome(), result);
      }
      System.out.println(ce.getAccuracy());
      System.out.println(ce.getKappa());     
      ce.pprintConfusionMatrix();
      ce.pprintPrecisionRecallEval();     
    }
   
    if(false) {
     
      List<Bag<String>> trueBags = new ArrayList<Bag<String>>();
View Full Code Here

          if(st2 == null) st2 = "";
         
          //if(t2.equals("CM")) st2 = "EXACT";
         
          if(!evals.containsKey(t1)) {
            evals.put(t1, new ClassificationEvaluator());
          }
          evals.get(t1).logEvent(st1, st2);
         
          if(!pevals.containsKey(t1)) {
            pevals.put(t1, new ClassificationEvaluator());
          }
          pevals.get(t1).logEvent(st1, st2);
         
          if(!st1.equals(st2)) {
            int start = Integer.parseInt(e1.getAttributeValue("xtspanstart"));
View Full Code Here

   
    if(trainData.size() == 1) trainData.add(trainData.get(0));
    DataIndexer di = new TwoPassDataIndexer(new EventCollectorAsStream(new SimpleEventCollector(trainData)), 1);
    GISModel gm = GIS.trainModel(100, di);
   
    ClassificationEvaluator ce = new ClassificationEvaluator();
   
    for(Event event : testData) {
      double [] results = gm.eval(event.getContext());
      String result = gm.getBestOutcome(results);
      System.out.println(event.getOutcome() + "\t" + result + "\t" + results[gm.getIndex(event.getOutcome())] + "\t" + StringTools.arrayToList(event.getContext()));
      ce.logEvent(event.getOutcome(), result);
    }
    System.out.println(ce.getAccuracy());
    System.out.println(ce.getKappa());
    ce.pprintConfusionMatrix();
    ce.pprintPrecisionRecallEval();

   
  }
View Full Code Here

          if(st2 == null) st2 = "";
         
          //if(t2.equals("CM")) st2 = "EXACT";
         
          if(!evals.containsKey(t1)) {
            evals.put(t1, new ClassificationEvaluator());
          }
          evals.get(t1).logEvent(st1, st2);
         
          if(!pevals.containsKey(t1)) {
            pevals.put(t1, new ClassificationEvaluator());
          }
          pevals.get(t1).logEvent(st1, st2);
         
          /*if(!st1.equals(st2)) {
            int start = Integer.parseInt(e1.getAttributeValue("xtspanstart"));
View Full Code Here

TOP

Related Classes of uk.ac.cam.ch.wwmm.ptclib.misc.ClassificationEvaluator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.