Package edu.stanford.nlp.parser.charniak

Source Code of edu.stanford.nlp.parser.charniak.CharniakScoredParsesReaderWriter$ScoredParsesIterator

package edu.stanford.nlp.parser.charniak;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.logging.Logger;
import java.util.regex.Pattern;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.StringOutputStream;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.Trees;
import edu.stanford.nlp.util.AbstractIterator;
import edu.stanford.nlp.util.IterableIterator;
import edu.stanford.nlp.util.ScoredObject;
import edu.stanford.nlp.util.Timing;

/**
* Utility routines for printing/reading scored parses for the Charniak Parser
*
* @author Angel Chang
*/
public class CharniakScoredParsesReaderWriter {
  private final static Logger logger = Logger.getLogger(CharniakScoredParsesReaderWriter.class.getName());

  private final static Pattern wsDelimiter = Pattern.compile("\\s+");

  /**
   * Reads scored parses from the charniak parser
   *
   * File format of the scored parses
   * <# of parses>\t<sentenceid>
   * <score>
   * <parse>
   * <score>
   * <parse>
   * ...
   *
   * @param filename  - File to read parses from
   * @return iterable with list of scored parse trees
   */
  public Iterable<List<ScoredObject<Tree>>> readScoredTrees(String filename)
  {
    try {
      ScoredParsesIterator iter = new ScoredParsesIterator(filename);
      return new IterableIterator<List<ScoredObject<Tree>>>(iter);
    } catch (IOException ex) {
      throw new RuntimeException(ex);
    }
  }

  /**
   * Reads scored parses from the charniak parser
   *
   * @param inputDesc - Description of input used in log messages
   * @param br - input reader
   * @return iterable with list of scored parse trees
   */
  public Iterable<List<ScoredObject<Tree>>> readScoredTrees(String inputDesc, BufferedReader br)
  {
    ScoredParsesIterator iter = new ScoredParsesIterator(inputDesc, br);
    return new IterableIterator<List<ScoredObject<Tree>>>(iter);
  }

  /**
   * Convert string representing scored parses (in the charniak parser output format)
   *   to list of scored parse trees
   * @param parseStr
   * @return list of scored parse trees
   */
  public List<ScoredObject<Tree>> stringToParses(String parseStr)
  {
    try {
      BufferedReader br = new BufferedReader(new StringReader(parseStr));
      Iterable<List<ScoredObject<Tree>>> trees = readScoredTrees("", br);
      List<ScoredObject<Tree>> res = null;
      if (trees != null) {
        Iterator<List<ScoredObject<Tree>>> iter = trees.iterator();
        if (iter != null && iter.hasNext()) {
          res = iter.next();
        }
      }
      br.close();
      return res;
    } catch (IOException ex) {
      throw new RuntimeException(ex);
    }
  }


  /**
   * Convert list of scored parse trees to string representing scored parses
   *   (in the charniak parser output format)
   * @param parses - list of scored parse trees
   * @return string representing scored parses
   */
  public String parsesToString(List<ScoredObject<Tree>> parses)
  {
    if (parses == null) return null;
    StringOutputStream os = new StringOutputStream();
    PrintWriter pw = new PrintWriter(os);
    printScoredTrees(pw, 0, parses);
    pw.close();
    return os.toString();
  }

  /**
   * Print scored parse trees in format used by charniak parser
   * @param trees - trees to output
   * @param filename - file to output to
   */
  public void printScoredTrees(Iterable<List<ScoredObject<Tree>>> trees, String filename)
  {
    try {
      PrintWriter pw = IOUtils.getPrintWriter(filename);
      int i = 0;
      for (List<ScoredObject<Tree>> treeList:trees) {
        printScoredTrees(pw, i, treeList);
        i++;
      }
      pw.close();
    } catch (IOException ex) {
      throw new RuntimeException(ex);
    }
  }

  /**
   * Print scored parse trees for one sentence in format used by charniak parser
   * @param pw - printwriter
   * @param id - sentence id
   * @param trees - trees to output
   */
  public void printScoredTrees(PrintWriter pw, int id, List<ScoredObject<Tree>> trees)
  {
    pw.println(trees.size() + "\t" + id);
    for (ScoredObject<Tree> scoredTree:trees) {
      pw.println(scoredTree.score());
      pw.println(scoredTree.object());
    }
  }

  private static class ScoredParsesIterator extends AbstractIterator<List<ScoredObject<Tree>>>
  {
    String inputDesc;
    BufferedReader br;
    List<ScoredObject<Tree>> next;
    Timing timing;
    int processed = 0;
    boolean done = false;
    boolean closeBufferNeeded = true;
    boolean expectConsecutiveSentenceIds = true;
    int lastSentenceId = -1;

    private ScoredParsesIterator(String filename) throws IOException {
      this(filename, IOUtils.getBufferedFileReader(filename));
    }


    private ScoredParsesIterator(String inputDesc, BufferedReader br) {
      this.inputDesc = inputDesc;
      this.br = br;
      logger.info("Reading cached parses from " + inputDesc);
      timing = new Timing();
      timing.start();
      next = getNext();
      done = next == null;
    }

    private List<ScoredObject<Tree>> getNext()
    {
      try {
        String line;
        int parsesExpected = 0;
        int sentenceId = lastSentenceId;
        ScoredObject<Tree> curParse = null;
        Double score = null;
        List<ScoredObject<Tree>> curParses = null;
        while ((line = br.readLine()) != null) {
          line = line.trim();
          if (line.length() > 0) {
            if (parsesExpected == 0) {
              // Finished processing parses
              String[] fields = wsDelimiter.split(line, 2);
              parsesExpected = Integer.parseInt(fields[0]);
              sentenceId = Integer.parseInt(fields[1]);
              if (expectConsecutiveSentenceIds) {
               if (sentenceId != lastSentenceId+1) {
                  if (lastSentenceId < sentenceId) {
                    StringBuilder sb = new StringBuilder("Missing sentences");
                    for (int i = lastSentenceId+1; i < sentenceId; i++) {
                      sb.append(" ").append(i);
                    }
                    logger.warning(sb.toString());
                  } else {
                    logger.warning("sentenceIds are not increasing (last="
                          + lastSentenceId + ", curr=" + sentenceId + ")");
                  }
                }
              }
              lastSentenceId = sentenceId;
              curParses = new ArrayList<ScoredObject<Tree>>(parsesExpected);
            } else {
              if (score == null) {
                // read score
                score = Double.parseDouble(line);
              } else {
                // Reading a parse
                curParse = new ScoredObject<Tree>(Trees.readTree(line), score);
                curParses.add(curParse);
                curParse = null;
                score = null;
                parsesExpected--;
                if (parsesExpected == 0) {
                  return curParses;
                }
              }
            }
          }
        }
      } catch (IOException ex) {
        throw new RuntimeException(ex);
      }
      return null;
    }

    public boolean hasNext()
    {
      return !done;
    }

    public List<ScoredObject<Tree>> next()
    {
      if (!done) {
        List<ScoredObject<Tree>> cur = next;
        next = getNext();
        processed++;
        if (next == null) {
          logger.finer("Read " + processed + " trees, from "
                  + inputDesc + " in " + timing.toSecondsString() + " secs");
          done = true;
          if (closeBufferNeeded) {
            try { br.close()} catch (IOException ex) {};
          }
        }
        return cur;
      } else {
        throw new NoSuchElementException("No more elements from " + inputDesc);
      }
    }
  }

}
TOP

Related Classes of edu.stanford.nlp.parser.charniak.CharniakScoredParsesReaderWriter$ScoredParsesIterator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.