Package opennlp.ccg.parse

Source Code of opennlp.ccg.parse.Parser

///////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2003-9 Jason Baldridge, Gann Bierner and Michael White
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
//////////////////////////////////////////////////////////////////////////////

package opennlp.ccg.parse;

import opennlp.ccg.TextCCG;
import opennlp.ccg.lexicon.*;
import opennlp.ccg.synsem.*;
import opennlp.ccg.grammar.*;
import opennlp.ccg.hylo.EPsScorer;
import opennlp.ccg.hylo.HyloHelper;
import opennlp.ccg.hylo.Nominal;
import opennlp.ccg.unify.*;
import opennlp.ccg.util.Pair;

import java.util.*;
import java.util.prefs.Preferences;

/**
* The parser is a CKY chart parser for CCG, optionally 
* with iterative beta-best supertagging and n-best output.
*
* @author      Jason Baldridge
* @author      Gann Bierner
* @author      Michael White
* @version     $Revision: 1.38 $, $Date: 2011/08/27 19:27:00 $
*/
public class Parser
{
    /** Preference key for time limit on parsing. */
    public static final String PARSE_TIME_LIMIT = "Parse Time Limit";
   
    /** A constant indicating no time limit on parsing. */
    public static final int NO_TIME_LIMIT = 0;

    /** Preference key for edge limit on parsing. */
    public static final String PARSE_EDGE_LIMIT = "Parse Edge Limit";
   
    /** A constant indicating no edge limit on parsing. */
    public static final int NO_EDGE_LIMIT = 0;

    /** Preference key for pruning the number of signs kept per equivalence class. */
    public static final String PARSE_PRUNING_VALUE = "Parse Pruning Value";

    /** Preference key for pruning the number of edges kept per cell. */
    public static final String PARSE_CELL_PRUNING_VALUE = "Parse Cell Pruning Value";

    /** A constant indicating no pruning of signs per equivalence class. */
    public static final int NO_PRUNING = 0;

    /** Preference key for whether to use lazy unpacking. */
    public static final String PARSE_LAZY_UNPACKING = "Parse Lazy Unpacking";
   
  /** The grammar. */
  public final Grammar grammar;
 
    /** The lexicon used to create edges. */   
    public final Lexicon lexicon;
   
    /** The rules used to create edges. */
    public final RuleGroup rules;
   
    /** Flag for whether to show the chart for failed parses. */
    public boolean debugParse = false;
   
  /** The sign scorer (or null if none). */
  protected SignScorer signScorer = null;
 
  /** The "n" for n-best pruning. (Default is none.) */
  protected int pruneVal = -1;
 
  /** The cell pruning value. (Default is none.) */
  protected int cellPruneVal = -1;
 
  /** The lazy unpacking flag. (Default is none.) */
  protected Boolean lazyUnpacking = null;
 
    /** Supertagger to use. (Default is none.) */
    protected Supertagger supertagger = null;
   
    /** Flag for whether to use the supertagger in the most-to-least restrictive direction. */
    protected boolean stMostToLeastDir = true;
   
    /** Time limit in milliseconds. (Default is none.) */
    protected int timeLimit = -1;
   
    /** Edge limit. (Default is none.) */
    protected int edgeLimit = -1;
   
    // start time for chart construction
    private long startTime = 0;
   
    // lex lookup time
    private int lexTime = 0;
   
    // parse time
    private int parseTime = 0;
   
    // chart construction time
    private int chartTime = 0;
   
    // unpacking time
    private int unpackingTime = 0;
   
    // time limit to use
    private int timeLimitToUse = NO_TIME_LIMIT;
   
    // edge limit to use
    private int edgeLimitToUse = NO_EDGE_LIMIT;
   
    // pruning value to use
    private int pruneValToUse = NO_PRUNING;
   
    // pruning value to use
    private int cellPruneValToUse = NO_PRUNING;
   
    // lazy unpacking flag to use
    private boolean lazyUnpackingToUse = true;
   
    // current chart
    private Chart chart = null;
   
    // parse results
    private ArrayList<Sign> result;

    // parse scores
    private ArrayList<Double> scores;

    // flag for whether to glue fragments currently
    private boolean gluingFragments = false;
   
    /** Constructor. */
    public Parser(Grammar grammar) {
      this.grammar = grammar;
        this.lexicon = grammar.lexicon;
        this.rules = grammar.rules;
    }
   
  /** Sets the sign scorer. */
  public void setSignScorer(SignScorer signScorer) { this.signScorer = signScorer; }
 
  /** Sets the time limit. */
  public void setTimeLimit(int timeLimit) { this.timeLimit = timeLimit; }
 
  /** Sets the edge limit. */
  public void setEdgeLimit(int edgeLimit) { this.edgeLimit = edgeLimit; }
 
  /** Sets the n-best pruning val. */
  public void setPruneVal(int n) { pruneVal = n; }
 
  /** Sets the cell pruning val. */
  public void setCellPruneVal(int n) { cellPruneVal = n; }
 
  /** Sets the lazy unpacking flag. */
  public void setLazyUnpacking(Boolean b) { this.lazyUnpacking = b; }
 
  /** Sets the supertagger. */
  public void setSupertagger(Supertagger supertagger) { this.supertagger = supertagger; }

  /** Sets the supertagger most-to-least restrictive direction flag. */
  public void setSupertaggerMostToLeastRestrictiveDirection(boolean bool) {
    stMostToLeastDir = bool;
  }
 
    /**
     * Parses a string.
     *
     * @param s the string
     * @exception ParseException thrown if a parse can't be found for the
     *            entire string
     */
    public void parse(String s) throws ParseException {
        // tokenize
        List<Word> words = lexicon.tokenizer.tokenize(s);
        // parse words
        parse(words);
    }
   
    /**
     * Parses a list of words.
     */
    public void parse(List<Word> words) throws ParseException {
      // set up timing: use limit from prefs unless explicitly set
    Preferences prefs = Preferences.userNodeForPackage(TextCCG.class);
      if (timeLimit >= 0) timeLimitToUse = timeLimit;
      else timeLimitToUse = prefs.getInt(PARSE_TIME_LIMIT, NO_TIME_LIMIT);
      if (edgeLimit >= 0) edgeLimitToUse = edgeLimit;
      else edgeLimitToUse = prefs.getInt(PARSE_EDGE_LIMIT, NO_EDGE_LIMIT);
      if (pruneVal >= 0) pruneValToUse = pruneVal;
      else pruneValToUse = prefs.getInt(PARSE_PRUNING_VALUE, NO_PRUNING);
      if (cellPruneVal >= 0) cellPruneValToUse = cellPruneVal;
      else cellPruneValToUse = prefs.getInt(PARSE_CELL_PRUNING_VALUE, NO_PRUNING);
      if (lazyUnpacking != null) lazyUnpackingToUse = lazyUnpacking;
      else lazyUnpackingToUse = prefs.getBoolean(PARSE_LAZY_UNPACKING, true);
      // supertagger case: iterative beta-best
      if (supertagger != null) {
        parseWithSupertagger(words);
        return;
      }
      // otherwise just once
        try {
          // init
          long lexStartTime = System.currentTimeMillis();
            UnifyControl.startUnifySequence();
            // get entries for each word
            List<SignHash> entries = new ArrayList<SignHash>(words.size());
            for (Word w : words) {
              entries.add(lexicon.getSignsFromWord(w));
            }
            lexTime = (int) (System.currentTimeMillis() - lexStartTime);
            // do parsing
            parseEntries(entries);
        } catch (LexException e) {
          setGiveUpTime();
          String msg = "Unable to retrieve lexical entries:\n\t" + e.toString();
          if (debugParse) System.out.println(msg);
          throw new ParseException(msg);
        }
    catch (ParseException e) {
          setGiveUpTime();
      // show chart for failed parse if apropos
      if (debugParse) {
        System.out.println(e);
        System.out.println("Chart for failed parse:");
        chart.printChart();
      }
          // rethrow
      throw e;
    }
    }

    // iterative beta-best parsing
    private void parseWithSupertagger(List<Word> words) throws ParseException {
      // set supertagger in lexicon
      grammar.lexicon.setSupertagger(supertagger);
      // ensure gluing off
      gluingFragments = false;
      // reset beta
      if (stMostToLeastDir) supertagger.resetBeta();
      else supertagger.resetBetaToMax();
      // loop
      boolean done = false;
      while (!done) {
        try {
            // init
              long lexStartTime = System.currentTimeMillis();
              UnifyControl.startUnifySequence();
                // get filtered entries for each word
                List<SignHash> entries = new ArrayList<SignHash>(words.size());
                supertagger.mapWords(words);
                for (int i=0; i < words.size(); i++) {
                  supertagger.setWord(i);
                    Word word = words.get(i);
                entries.add(lexicon.getSignsFromWord(word));
                }
                lexTime = (int) (System.currentTimeMillis() - lexStartTime);
                // do parsing
                parseEntries(entries);
                // done
                done = true;
              // reset supertagger in lexicon, turn gluing off
              grammar.lexicon.setSupertagger(null);
              gluingFragments = false;
        }
        catch (LexException e) {
          // continue if more betas
          if (stMostToLeastDir && supertagger.hasMoreBetas()) {
            supertagger.nextBeta();
          }
          // otherwise give up
          else {
                setGiveUpTime();
                // reset supertagger in lexicon, turn gluing off
                grammar.lexicon.setSupertagger(null);
                  gluingFragments = false;
                // throw parse exception
                String msg = "Unable to retrieve lexical entries:\n\t" + e.toString();
                if (debugParse) System.out.println(msg);
                  throw new ParseException(msg);
          }
        }
        catch (ParseException e) {
          // check if limits exceeded
          boolean outwith = e.getMessage() == ParseException.EDGE_LIMIT_EXCEEDED ||
          e.getMessage() == ParseException.TIME_LIMIT_EXCEEDED;
          // continue if more betas and limits not exceeded
          if (stMostToLeastDir && supertagger.hasMoreBetas() && !outwith)
            supertagger.nextBeta();
          // or if limits exceeded and moving in the opposite direction
          else if (!stMostToLeastDir && supertagger.hasLessBetas() && outwith)
            supertagger.previousBeta();
          // otherwise try glue rule, unless already on
          else if (!gluingFragments) {
            supertagger.resetBeta(); // may as well use most restrictive supertagger setting with glue rule
            gluingFragments = true;
          }
          // otherwise give up
          else {
                setGiveUpTime();
            // show chart for failed parse if apropos
            if (debugParse) {
              System.out.println(e);
              System.out.println("Chart for failed parse:");
              chart.printChart();
            }
                // reset supertagger in lexicon, turn gluing off
                grammar.lexicon.setSupertagger(null);
                  gluingFragments = false;
                // rethrow
            throw e;
          }
        }
      }
    }
   
    /**
     * Returns the results of the parse.
     */
    public List<Sign> getResult() { return result; }

    /**
     * Returns the corresponding scores for the results of the parse.
     */
    public List<Double> getScores() { return scores; }

  /** Returns the edge count prior to unpacking. */
  public int edgeCount() { return (chart != null) ? chart.edgeCount() : 0; }
 
  /** Returns the edge count while unpacking. */
  public int unpackingEdgeCount() { return (chart != null) ? chart.unpackingEdgeCount() : 0; }

  /** Returns the max cell size prior to unpacking. */
  public int maxCellSize() { return (chart != null) ? chart.maxCellSize() : 0; }
 
  /** Returns the lexical lookup time for the latest parse. */
  public int getLexTime() { return lexTime; }
 
  /** Returns the overall parse time (but excluding lex lookup) for the latest parse. */
  public int getParseTime() { return parseTime; }
 
  /** Returns the time spent constructing the chart. */
  public int getChartTime() { return chartTime; }
 
  /** Returns the time spent unpacking. */
  public int getUnpackingTime() { return unpackingTime; }
 
  /** Returns the supertagger's final beta value (or 0 if none). */
  public double getSupertaggerBeta() {
    return (supertagger != null) ? supertagger.getCurrentBetaValue() : 0;
  }
 
 
    // parses from lex entries
    private void parseEntries(List<SignHash> entries) throws ParseException {
      startTime = System.currentTimeMillis();
        // set up chart
        initializeChart(entries);
        if (signScorer != null) chart.setSignScorer(signScorer);
        chart.setPruneVal(pruneValToUse);
        chart.setTimeLimit(timeLimitToUse);
        chart.setStartTime(startTime);
        chart.setEdgeLimit(edgeLimitToUse);
        chart.setCellLimit(cellPruneValToUse);
        // do parsing
        parse(entries.size());
    }
   
    // initialize the chart
    private void initializeChart(List<SignHash> entries) {
        chart = new Chart(entries.size(), rules);
        for (int i=0; i < entries.size(); i++) {
            SignHash wh = entries.get(i);
            for (Sign sign : wh.getSignsSorted()) {
                Category cat = sign.getCategory();
                UnifyControl.reindex(cat);
                chart.insert(i, i, sign);
            }
        }
    }
   
    // actual CKY parsing
    private void parse(int size) throws ParseException {
      // fill in chart
        for (int i=0; i<size; i++) {
          chart.insertCell(i,i);
        }
        for (int j=1; j<size; j++) {
            for (int i=j-1; i>=0; i--) {
                for (int k=i; k<j; k++) {
                  chart.insertCell(i,k, k+1,j, i,j);
                }
                chart.insertCell(i,j);
            }
        }
        // glue fragments if apropos
        if (gluingFragments && chart.cellIsEmpty(0, size-1)) {
            for (int j=1; j<size; j++) {
                for (int i=j-1; i>=0; i--) {
                    for (int k=i; k<j; k++) {
                      chart.insertCellFrag(i,k, k+1,j, i,j);
                    }
                }
            }
        }
        chartTime = (int) (System.currentTimeMillis() - startTime);
        // extract results
        createResult(size);
        parseTime = (int) (System.currentTimeMillis() - startTime);
        unpackingTime = parseTime - chartTime;
    }
 
    // create answer ArrayList
    private void createResult(int size) throws ParseException {
        result = new ArrayList<Sign>();
        scores = new ArrayList<Double>();
        // unpack top
        List<Edge> unpacked = (lazyUnpackingToUse)
          ? chart.lazyUnpack(0,size - 1)
      : chart.unpack(0, size - 1);
        // add signs for unpacked edges
        for (Edge edge : unpacked) {
          result.add(edge.sign);
          scores.add(edge.score);
        }
        // check non-empty
        if (result.size() == 0) {
            throw new ParseException("Unable to parse");
        }
    }
   
    // set parse time when giving up
    private void setGiveUpTime() {
        chartTime = (int) (System.currentTimeMillis() - startTime);
        parseTime = chartTime;
      unpackingTime = 0;
    }
   
    /**
     * Adds the supertagger log probs to the lexical signs of the gold standard parse.
     */
    public void addSupertaggerLogProbs(Sign gold) {
      List<Word> words = gold.getWords();
        supertagger.mapWords(words);
        addSupertaggerLogProbs(gold, gold);
        for (int i=0; i < words.size(); i++) {
          supertagger.setWord(i);
        }
    }

    // recurses through derivation, adding lex log probs to lexical signs
    private void addSupertaggerLogProbs(Sign gold, Sign current) {
      // lookup and add log prob for lex sign
      if (current.isLexical()) {
        supertagger.setWord(gold.wordIndex(current));
        Map<String,Double> stags = supertagger.getSupertags();
        Double lexprob = stags.get(current.getSupertag());
        if (lexprob != null) {
          current.addData(new SupertaggerAdapter.LexLogProb((float) Math.log10(lexprob)));
        }
      }
      // otherwise recurse
      else {
        Sign[] inputs = current.getDerivationHistory().getInputs();
        for (Sign s : inputs) addSupertaggerLogProbs(gold, s);
      }
    }
   
    /**
     * Returns the oracle best sign among those in the n-best list for the given LF,
     * using the f-score on all EPs, together with a flag indicating whether the gold LF
     * was found (as indicated by an f-score of 1.0).
     * NB: It would be better to return the forest oracle, but the nominal conversion would
     * be tricky to do correctly.
     */
    public Pair<Sign,Boolean> oracleBest(LF goldLF) {
      Sign retval = null; double bestF = 0.0;
      for (Sign sign : result) {
            Category cat = sign.getCategory().copy();
            Nominal index = cat.getIndexNominal();
            LF parsedLF = cat.getLF();
            if (parsedLF != null) {
              index = HyloHelper.convertNominals(parsedLF, sign, index);
          EPsScorer.Results score = EPsScorer.score(parsedLF, goldLF);
          if (score.fscore > bestF) {
            retval = sign;
            bestF = score.fscore;
          }
            }
      }
      return new Pair<Sign,Boolean>(retval, (bestF == 1.0));
    }
}

TOP

Related Classes of opennlp.ccg.parse.Parser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.