Package edu.stanford.nlp.ie.crf

Source Code of edu.stanford.nlp.ie.crf.CRFClassifierEvaluator

package edu.stanford.nlp.ie.crf;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.optimization.CmdEvaluator;
import edu.stanford.nlp.stats.MultiClassChunkEvalStats;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Triple;

import java.io.*;
import java.util.Collection;
import java.util.List;

/**
* Evaluates CRFClassifier on a set of data
* - called by QNMinimizer periodically
* - If evalCmd is set, runs command line specified by evalCmd
*                      otherwise does evaluation internally
*   NOTE: when running conlleval with exec on Linux, linux will first
*          fork process by duplicating memory of current process.  So if
*          JVM has lots of memory, it will all be duplicated when
*          child process is initially forked.
* @author Angel Chang
*/
public class CRFClassifierEvaluator<IN extends CoreMap> extends CmdEvaluator {

  private CRFClassifier<IN> classifier;
  // NOTE: Default uses -r, specify without -r if IOB
  private String cmdStr = "/u/nlp/bin/conlleval -r";
  private String[] cmd;

  // TODO: Use data structure to hold data + features
  // Cache already featurized documents
  // Original object bank
  Collection<List<IN>> data;
  // Featurized data
  List<Triple<int[][][], int[], double[][][]>> featurizedData;

  public CRFClassifierEvaluator(String description,
                                CRFClassifier<IN> classifier,
                                Collection<List<IN>> data,
                                List<Triple<int[][][], int[], double[][][]>> featurizedData)
  {
    this.description = description;
    this.classifier = classifier;
    this.data = data;
    this.featurizedData = featurizedData;
    cmd = getCmd(cmdStr);
    saveOutput = true;
  }

  public CRFClassifierEvaluator(String description,
                                CRFClassifier<IN> classifier)
  {
    this.description = description;
    this.classifier = classifier;
    saveOutput = true;
  }

  /**
   * Set the data to test on
   */
  public void setTestData(Collection<List<IN>> data, List<Triple<int[][][], int[], double[][][]>> featurizedData)
  {
    this.data = data;
    this.featurizedData = featurizedData;
  }

  /**
   * Set the evaluation command (set to null to skip evaluation using command line)
   * @param evalCmd
   */
  public void setEvalCmd(String evalCmd)
  {
    System.err.println("setEvalCmd to " + evalCmd);
    this.cmdStr = evalCmd;
    if (cmdStr != null) {
      cmdStr = cmdStr.trim();
      if (cmdStr.length() == 0) { cmdStr = null; }
    }
    cmd = getCmd(cmdStr);
  }

  public void setValues(double[] x)
  {
    classifier.updateWeightsForTest(x);
  }

  public String[] getCmd()
  {
    return cmd;
  }

  private double interpretCmdOutput() {
    String output = getOutput();
    String[] parts = output.split("\\s+");
    int fScoreIndex = 0;
    for (; fScoreIndex < parts.length; fScoreIndex++)
      if (parts[fScoreIndex].equals("FB1:"))
        break;
    fScoreIndex += 1;
    if (fScoreIndex < parts.length)
      return Double.parseDouble(parts[fScoreIndex]);
    else {
      System.err.println("ERROR in CRFClassifierEvaluator.interpretCmdOutput(), cannot find FB1 score in output:\n"+output);
      return -1;
    }
  }

  @Override
  public void outputToCmd(OutputStream outputStream)
  {
    try {
      PrintWriter pw = IOUtils.encodedOutputStreamPrintWriter(outputStream, null, true);
      classifier.classifyAndWriteAnswers(data, featurizedData, pw,
                                         classifier.makeReaderAndWriter());
    } catch (IOException ex) {
      throw new RuntimeIOException(ex);
    }
  }

  public double evaluate(double[] x) {
    double score = 0;
    setValues(x);
    if (getCmd() != null) {
      evaluateCmd(getCmd());
      score = interpretCmdOutput();
    } else {
      try {
        // TODO: Classify in memory instead of writing to tmp file
        File f = File.createTempFile("CRFClassifierEvaluator","txt");
        f.deleteOnExit();
        OutputStream outputStream = new BufferedOutputStream(new FileOutputStream(f));
        PrintWriter pw = IOUtils.encodedOutputStreamPrintWriter(outputStream, null, true);
        classifier.classifyAndWriteAnswers(data, featurizedData, pw,
                                           classifier.makeReaderAndWriter());
        outputStream.close();
        BufferedReader br = new BufferedReader(new FileReader(f));
        MultiClassChunkEvalStats stats = new MultiClassChunkEvalStats("O");
        score = stats.score(br, "\t");
        System.err.println(stats.getConllEvalString());
        f.delete();
      } catch (Exception ex) {
        throw new RuntimeException(ex);
      }
    }
    return score;
  }

}
TOP

Related Classes of edu.stanford.nlp.ie.crf.CRFClassifierEvaluator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.