Examples of edu.stanford.nlp.parser.lexparser.LexicalizedParser

Package edu.stanford.nlp.parser.lexparser

Examples of edu.stanford.nlp.parser.lexparser.LexicalizedParser

edu.stanford.nlp.parser.lexparser.LexicalizedParser
This class provides the top-level API and command-line interface to a set of reasonably good treebank-trained parsers. The name reflects the main factored parsing model, which provides a lexicalized PCFG parser implemented as a product model of a plain PCFG parser and a lexicalized dependency parser. But you can also run either component parser alone. In particular, it is often useful to do unlexicalized PCFG parsing by using just that component parser.
See the package documentation for more details and examples of use.
For information on invoking the parser from the command-line, and for a more detailed list of options, see the {@link #main} method.
Note that training on a 1 million word treebank requires a fair amount of memory to run. Try -mx1500m to increase the memory allocated by the JVM. @author Dan Klein (original version) @author Christopher Manning (better features, ParserParams, serialization) @author Roger Levy (internationalization) @author Teg Grenager (grammar compaction, tokenization, etc.) @author Galen Andrew (considerable refactoring) @author John Bauer (made threadsafe)

  }
  */


  // todo: rewrite to test all Chinese models, as for English
  public void testChineseTagSet() {
    LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/chineseFactored.ser.gz");
    MaxentTagger tagger = new MaxentTagger("edu/stanford/nlp/models/pos-tagger/chinese-distsim/chinese-distsim.tagger");
    assertEquals("Chinese (Fact/distsim) parser/tagger tag set mismatch",
            lp.getLexicon().tagSet(lp.treebankLanguagePack().getBasicCategoryFunction()), tagger.tagSet());
  }

View Full Code Here

      tb.add(t);
    } else if (treeFileName != null) {
      tb.loadPath(treeFileName);
    } else {
      String[] options = {"-retainNPTmpSubcategories"};
      LexicalizedParser lp = LexicalizedParser.loadModel("/u/nlp/data/lexparser/englishPCFG.ser.gz", options);
      BufferedReader reader = null;
      try {
        reader = IOUtils.readerFromString(sentFileName);
      } catch (IOException e) {
        throw new RuntimeIOException("Cannot find or open " + sentFileName, e);
      }
      try {
        System.out.println("Processing sentence file " + sentFileName);
        for  (String line; (line = reader.readLine()) != null; ) {
          System.out.println("Processing sentence: " + line);
          PTBTokenizer<Word> ptb = PTBTokenizer.newPTBTokenizer(new StringReader(line));
          List<Word> words = ptb.tokenize();
          Tree parseTree = lp.parseTree(words);
          tb.add(parseTree);
        }
        reader.close();
      } catch (Exception e) {
        throw new RuntimeException("Exception reading key file " + sentFileName, e);

View Full Code Here

      }
    }


    System.err.println("Loading lexparser from: " + lexparserFile);
    String[] newArgs = unusedArgs.toArray(new String[unusedArgs.size()]);
    LexicalizedParser lexparser = LexicalizedParser.loadModel(lexparserFile, newArgs);
    System.err.println("... done");


    Treebank testTreebank = null;
    if (testTreebankPath != null) {
      System.err.println("Reading in trees from " + testTreebankPath);
      if (testTreebankFilter != null) {
        System.err.println("Filtering on " + testTreebankFilter);
      }
      testTreebank = lexparser.getOp().tlpParams.memoryTreebank();;
      testTreebank.loadPath(testTreebankPath, testTreebankFilter);
      System.err.println("Read in " + testTreebank.size() + " trees for testing");
    }


    double[] labelResults = new double[weights.length];
    double[] tagResults = new double[weights.length];


    for (int i = 0; i < weights.length; ++i) {
      lexparser.getOp().baseParserWeight = weights[i];
      EvaluateTreebank evaluator = new EvaluateTreebank(lexparser);
      evaluator.testOnTreebank(testTreebank);
      labelResults[i] = evaluator.getLBScore();
      tagResults[i] = evaluator.getTagScore();
    }

View Full Code Here

      throw new IllegalArgumentException("Need to specify -output");
    }


    String[] newArgs = unusedArgs.toArray(new String[unusedArgs.size()]);


    LexicalizedParser lexparser = LexicalizedParser.loadModel(modelPath, newArgs);


    Treebank testTreebank = null;
    if (testTreebankPath != null) {
      System.err.println("Reading in trees from " + testTreebankPath);
      if (testTreebankFilter != null) {
        System.err.println("Filtering on " + testTreebankFilter);
      }
      testTreebank = lexparser.getOp().tlpParams.memoryTreebank();;
      testTreebank.loadPath(testTreebankPath, testTreebankFilter);
      System.err.println("Read in " + testTreebank.size() + " trees for testing");
    }


    FileWriter out = new FileWriter(outputPath);
    BufferedWriter bout = new BufferedWriter(out);


    System.err.println("Parsing " + testTreebank.size() + " trees");
    int count = 0;
    List<ParseRecord> records = Generics.newArrayList();
    for (Tree goldTree : testTreebank) {
      List<Word> tokens = goldTree.yieldWords();
      ParserQuery parserQuery = lexparser.parserQuery();
      if (!parserQuery.parse(tokens)) {
        throw new AssertionError("Could not parse: " + tokens);
      }
      if (!(parserQuery instanceof RerankingParserQuery)) {
        throw new IllegalArgumentException("Expected a LexicalizedParser with a Reranker attached");

View Full Code Here

    File outputFile = new File(outputDir);
    FileSystem.checkNotExistsOrFail(outputFile);


    FileSystem.mkdirOrFail(outputFile);


    LexicalizedParser parser = LexicalizedParser.loadModel(modelPath);
    DVModel model = DVParser.getModelFromLexicalizedParser(parser);


    String binaryWDir = outputDir + File.separator + "binaryW";
    FileSystem.mkdirOrFail(binaryWDir);
    for (TwoDimensionalMap.Entry<String, String, SimpleMatrix> entry : model.binaryTransform) {

View Full Code Here

    if (parserFile == null) {
      System.err.println("Must specify a model file with -model");
      System.exit(2);
    }


    LexicalizedParser parser = LexicalizedParser.loadModel(parserFile);


    Set<String> tags = Generics.newTreeSet();
    for (String tag : parser.tagIndex) {
      tags.add(parser.treebankLanguagePack().basicCategory(tag));
    }
    System.out.println("Basic tags: " + tags.size());
    for (String tag : tags) {
      System.out.print("  " + tag);
    }
    System.out.println();
    System.out.println("All tags size: " + parser.tagIndex.size());


    Set<String> states = Generics.newTreeSet();
    for (String state : parser.stateIndex) {
      states.add(parser.treebankLanguagePack().basicCategory(state));
    }
    System.out.println("Basic states: " + states.size());
    for (String tag : states) {
      System.out.print("  " + tag);
    }

View Full Code Here

      //System.out.println(tree.pennString());
      checkTree(tree);


      // System.err.println("Four.");
      // check a tree generated by the parser
      LexicalizedParser parser = LexicalizedParser.loadModel();
      tree = parser.parse("Mary had a little lamb .");
      // System.out.println(tree.pennString());
      tree.indexLeaves();
      checkTree(tree);


    } catch (IOException e) {

View Full Code Here

      } else {
        extraArgs.add(args[argIndex++]);
      }
    }


    LexicalizedParser parser = LexicalizedParser.loadModel(input, extraArgs);
    parser.saveParserToSerialized(output);
  }

View Full Code Here

    }


    System.err.println("Averaging " + inputModelFilenames);
    System.err.println("Outputting result to " + outputModelFilename);


    LexicalizedParser lexparser = null;
    List<DVModel> models = Generics.newArrayList();
    for (String filename : inputModelFilenames) {
      LexicalizedParser parser = LexicalizedParser.loadModel(filename);
      if (lexparser == null) {
        lexparser = parser;
      }
      models.add(DVParser.getModelFromLexicalizedParser(parser));
    }

View Full Code Here

    // parser object (unless it's a text file parser?)
    Options op = new Options(ctpp);
    op.doDep = false;
    op.testOptions.maxLength = 90;


    LexicalizedParser lp;
    try {
      FileFilter trainFilt = new NumberRangesFileFilter(args[1], false);


      lp = LexicalizedParser.trainFromTreebank(args[0], trainFilt, op);
      try {
        String filename = "chineseCharTagPCFG.ser.gz";
        System.err.println("Writing parser in serialized format to file " + filename + " ");
        System.err.flush();
        ObjectOutputStream out = IOUtils.writeStreamFromString(filename);


        out.writeObject(lp);
        out.close();
        System.err.println("done.");
      } catch (IOException ioe) {
        ioe.printStackTrace();
      }
    } catch (IllegalArgumentException e) {
      lp = LexicalizedParser.loadModel(args[1], op);
    }


    FileFilter testFilt = new NumberRangesFileFilter(args[2], false);
    MemoryTreebank testTreebank = ctpp.memoryTreebank();
    testTreebank.loadPath(new File(args[0]), testFilt);
    PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream("out.chi"), "GB18030"), true);
    WordCatEquivalenceClasser eqclass = new WordCatEquivalenceClasser();
    WordCatEqualityChecker eqcheck = new WordCatEqualityChecker();
    EquivalenceClassEval eval = new EquivalenceClassEval(eqclass, eqcheck);
    //    System.out.println("Preterminals:" + preterminals);
    System.out.println("Testing...");
    for (Tree gold : testTreebank) {
      Tree tree;
      try {
        tree = lp.parseTree(gold.yieldHasWord());
        if (tree == null) {
          System.out.println("Failed to parse " + gold.yieldHasWord());
          continue;
        }
      } catch (Exception e) {

View Full Code Here

0 1

TOP

Related Classes of edu.stanford.nlp.parser.lexparser.LexicalizedParser

edu.stanford.nlp.ling.Word

edu.stanford.nlp.parser.common.ParserQuery

edu.stanford.nlp.parser.DependencyIndexITest

edu.stanford.nlp.parser.dvparser.AverageDVModels

edu.stanford.nlp.parser.dvparser.CacheParseHypotheses

edu.stanford.nlp.parser.dvparser.CombineDVModels

edu.stanford.nlp.parser.dvparser.CrossValidateTestOptions

edu.stanford.nlp.parser.dvparser.DumpMatrices

edu.stanford.nlp.parser.dvparser.DVParser

edu.stanford.nlp.parser.dvparser.FindNearestNeighbors

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.