Package edu.stanford.nlp.trees

Examples of edu.stanford.nlp.trees.TreeTransformer


      }
    }

    if (trainTreebankPath != null) {
      // TODO: make the transformer a member of the model?
      TreeTransformer transformer = buildTrainTransformer(dvparser.getOp());

      Treebank treebank = dvparser.getOp().tlpParams.memoryTreebank();;
      treebank.loadPath(trainTreebankPath, trainTreebankFilter);
      treebank = treebank.transform(transformer);
      System.err.println("Read in " + treebank.size() + " trees from " + trainTreebankPath);
View Full Code Here


    System.arraycopy(pieces, 0, args, 0, pieces.length - 1);
    String file = pieces[pieces.length - 1];
    Format format = Format.TEXT;
    String encoding = getEncoding(config);
    String tagSeparator = getTagSeparator(config);
    TreeTransformer treeTransformer = null;
    TreeNormalizer treeNormalizer = null;
    TreeReaderFactory trf = null;
    NumberRangesFileFilter treeRange = null;
    Predicate<Tree> treeFilter = null;
    Integer wordColumn = null, tagColumn = null;
View Full Code Here

    System.err.println("Loading parser model " + parserModel);
    System.err.println("Writing " + dvKBest + " hypothesis trees for each tree");

    LexicalizedParser parser = LexicalizedParser.loadModel(parserModel, "-dvKBest", Integer.toString(dvKBest));
    CacheParseHypotheses cacher = new CacheParseHypotheses(parser);
    TreeTransformer transformer = DVParser.buildTrainTransformer(parser.getOp());
    List<Tree> sentences = new ArrayList<Tree>();
    for (Pair<String, FileFilter> description : treebanks) {
      System.err.println("Reading trees from " + description.first);
      Treebank treebank = parser.getOp().tlpParams.memoryTreebank();
      treebank.loadPath(description.first, description.second);
View Full Code Here

      }
      String[] testArgs = (argMap.get("-test"));
      MemoryTreebank testTreebank = op.tlpParams.memoryTreebank();
      FileFilter testFilt = new NumberRangesFileFilter(testArgs[1], false);
      testTreebank.loadPath(new File(testArgs[0]), testFilt);
      TreeTransformer subcategoryStripper = op.tlpParams.subcategoryStripper();
      TreeTransformer collinizer = ctpp.collinizer();

      WordCatEquivalenceClasser eqclass = new WordCatEquivalenceClasser();
      WordCatEqualityChecker eqcheck = new WordCatEqualityChecker();
      EquivalenceClassEval basicEval = new EquivalenceClassEval(eqclass, eqcheck, "basic");
      EquivalenceClassEval collinsEval = new EquivalenceClassEval(eqclass, eqcheck, "collinized");
      List<String> evalTypes = new ArrayList<String>(3);
      boolean goodPOS = false;
      if (segmentWords) {
        evalTypes.add(WordCatConstituent.wordType);
        if (ctpp.segmentMarkov && !parse) {
          evalTypes.add(WordCatConstituent.tagType);
          goodPOS = true;
        }
      }
      if (parse) {
        evalTypes.add(WordCatConstituent.tagType);
        evalTypes.add(WordCatConstituent.catType);
        if (combo) {
          evalTypes.add(WordCatConstituent.wordType);
          goodPOS = true;
        }
      }
      TreeToBracketProcessor proc = new TreeToBracketProcessor(evalTypes);

      System.err.println("Testing...");
      for (Tree goldTop : testTreebank) {
        Tree gold = goldTop.firstChild();
        List<HasWord> goldSentence = gold.yieldHasWord();
        if (goldSentence.size() > maxLength) {
          System.err.println("Skipping sentence; too long: " + goldSentence.size());
          continue;
        } else {
          System.err.println("Processing sentence; length: " + goldSentence.size());
        }
        List<HasWord> s;
        if (segmentWords) {
          StringBuilder goldCharBuf = new StringBuilder();
          for (Iterator<HasWord> wordIter = goldSentence.iterator(); wordIter.hasNext();) {
            StringLabel word = (StringLabel) wordIter.next();
            goldCharBuf.append(word.value());
          }
          String goldChars = goldCharBuf.toString();
          s = seg.segment(goldChars);
        } else {
          s = goldSentence;
        }
        Tree tree;
        if (parse) {
          tree = lp.parseTree(s);
          if (tree == null) {
            throw new RuntimeException("PARSER RETURNED NULL!!!");
          }
        } else {
          tree = Trees.toFlatTree(s);
          tree = subcategoryStripper.transformTree(tree);
        }

        if (pw != null) {
          if (parse) {
            tree.pennPrint(pw);
          } else {
            Iterator sentIter = s.iterator();
            for (; ;) {
              Word word = (Word) sentIter.next();
              pw.print(word.word());
              if (sentIter.hasNext()) {
                pw.print(" ");
              } else {
                break;
              }
            }
          }
          pw.println();
        }

        if (eval) {
          Collection ourBrackets, goldBrackets;
          ourBrackets = proc.allBrackets(tree);
          goldBrackets = proc.allBrackets(gold);
          if (goodPOS) {
            ourBrackets.addAll(proc.commonWordTagTypeBrackets(tree, gold));
            goldBrackets.addAll(proc.commonWordTagTypeBrackets(gold, tree));
          }

          basicEval.eval(ourBrackets, goldBrackets);
          System.out.println("\nScores:");
          basicEval.displayLast();

          Tree collinsTree = collinizer.transformTree(tree);
          Tree collinsGold = collinizer.transformTree(gold);
          ourBrackets = proc.allBrackets(collinsTree);
          goldBrackets = proc.allBrackets(collinsGold);
          if (goodPOS) {
            ourBrackets.addAll(proc.commonWordTagTypeBrackets(collinsTree, collinsGold));
            goldBrackets.addAll(proc.commonWordTagTypeBrackets(collinsGold, collinsTree));
View Full Code Here

          ReflectionLoading.loadByReflection(classes[0], this);
      } else if (classes.length > 1) {
        CompositeTreeTransformer composite = new CompositeTreeTransformer();
        trainOptions.preTransformer = composite;
        for (String clazz : classes) {
          TreeTransformer transformer =
            ReflectionLoading.loadByReflection(clazz, this);
          composite.addTransformer(transformer);
        }
      }
    } else if (args[i].equalsIgnoreCase("-taggedFiles") && (i + 1 < args.length)) {
View Full Code Here

    pwOut.println("GOLD TREEBANK:");
    pwOut.println(goldTreebank.textualSummary());

    final CollinsDepEval depEval = new CollinsDepEval("CollinsDep", true, tlpp.headFinder(), tlpp.treebankLanguagePack().startSymbol());

    final TreeTransformer tc = tlpp.collinizer();

    //PennTreeReader skips over null/malformed parses. So when the yields of the gold/guess trees
    //don't match, we need to keep looking for the next gold tree that matches.
    //The evalb ref implementation differs slightly as it expects one tree per line. It assigns
    //status as follows:
    //
    //   0 - Ok (yields match)
    //   1 - length mismatch
    //   2 - null parse e.g. (()).
    //
    //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.

    final Iterator<Tree> goldItr = goldTreebank.iterator();
    int goldLineId = 0;
    int skippedGuessTrees = 0;

    for(final Tree guess : guessTreebank) {
      final Tree evalGuess = tc.transformTree(guess);
      if(guess.yield().size() > MAX_GUESS_YIELD) {
        skippedGuessTrees++;
        continue;
      }

      boolean doneEval = false;
      while(goldItr.hasNext() && !doneEval) {
        final Tree gold = goldItr.next();
        final Tree evalGold = tc.transformTree(gold);
        goldLineId++;

        if(gold.yield().size() > MAX_GOLD_YIELD) {
          continue;
View Full Code Here

    pwOut.println(goldTreebank.textualSummary());

    final String evalName = (tagMode) ? "TsarfatyTAG" : "TsarfatySEG";
    final TsarfatyEval eval = new TsarfatyEval(evalName, tagMode);

    final TreeTransformer tc = tlpp.collinizer();

    //PennTreeReader skips over null/malformed parses. So when the yields of the gold/guess trees
    //don't match, we need to keep looking for the next gold tree that matches.
    //The evalb ref implementation differs slightly as it expects one tree per line. It assigns
    //status as follows:
    //
    //   0 - Ok (yields match)
    //   1 - length mismatch
    //   2 - null parse e.g. (()).
    //
    //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.

    final Iterator<Tree> goldItr = goldTreebank.iterator();
    int goldLineId = 0;
    int skippedGuessTrees = 0;

    for(final Tree guess : guessTreebank) {
      final Tree evalGuess = tc.transformTree(guess);
      final ArrayList<Label> guessSent = guess.yield();
      final String guessChars = Sentence.listToString(guessSent).replaceAll("\\s+","");
      if(guessSent.size() > maxGuessYield) {
        skippedGuessTrees++;
        continue;
      }

      boolean doneEval = false;
      while(goldItr.hasNext() && !doneEval) {
        final Tree gold = goldItr.next();
        final Tree evalGold = tc.transformTree(gold);
        goldLineId++;

        final ArrayList<Label> goldSent = gold.yield();
        final String goldChars = Sentence.listToString(goldSent).replaceAll("\\s+","");
View Full Code Here

    if(args.length != 1) {
      System.err.println("Usage: java " + FTBCorrector.class.getName() + " filename\n");
      System.exit(-1);
    }
   
    TreeTransformer tt = new FTBCorrector();
   
    File f = new File(args[0]);
    try {
      //These bad trees in the Candito training set should be thrown out:
      //  (ROOT (SENT (" ") (. .)))
      //  (ROOT (SENT (. .)))
      TregexPattern pBadTree = TregexPattern.compile("@SENT <: @PUNC");
      TregexPattern pBadTree2 = TregexPattern.compile("@SENT <1 @PUNC <2 @PUNC !<3 __");
     
      BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(f), "UTF-8"));
      TreeReaderFactory trf = new FrenchTreeReaderFactory();
      TreeReader tr = trf.newTreeReader(br);
  
      int nTrees = 0;
      for(Tree t; (t = tr.readTree()) != null;nTrees++) {
        TregexMatcher m = pBadTree.matcher(t);
        TregexMatcher m2 = pBadTree2.matcher(t);
        if(m.find() || m2.find()) {
          System.err.println("Discarding tree: " + t.toString());
        } else {
          Tree fixedT = tt.transformTree(t);
          System.out.println(fixedT.toString());
        }
      }
     
      tr.close();
View Full Code Here

    Queue<String> fNameQueue = new LinkedList<String>(Arrays.asList(fNames));

    TregexPattern pBadTree = TregexPattern.compile("@SENT <: @PUNC");
    TregexPattern pBadTree2 = TregexPattern.compile("@SENT <1 @PUNC <2 @PUNC !<3 __");
   
    final TreeTransformer tt = new FTBCorrector();

    int size = fSizeQueue.remove();
    String filename = fNameQueue.remove();

    System.err.println("Outputing " + filename);

    PrintWriter writer =
      new PrintWriter(new BufferedWriter
                      (new OutputStreamWriter
                       (new FileOutputStream(filename), "UTF-8")));

    int outputCount = 0;
    for (String id : ids) {
      if (!treeMap.containsKey(id)) {
        System.err.println("Missing id: " + id);
        continue;
      }

      Tree tree = treeMap.get(id);
      TregexMatcher m = pBadTree.matcher(tree);
      TregexMatcher m2 = pBadTree2.matcher(tree);
      if(m.find() || m2.find()) {
        System.err.println("Discarding tree: " + tree.toString());
        continue;
      }
     
      // Punctuation normalization, etc.
      Tree backupCopy = tree.deepCopy();
      tree = tt.transformTree(tree);
      if (tree.firstChild().children().length == 0) {
        // Some trees have only punctuation. Tregex will mangle these. Don't throw those away.
        System.err.println("Saving tree: " + tree.toString());
        System.err.println("Backup: " + backupCopy.toString());
        tree = backupCopy;
View Full Code Here

    pwOut.println("GOLD TREEBANK:");
    pwOut.println(goldTreebank.textualSummary());

    final Evalb metric = new Evalb("Evalb LP/LR", true);
    final EvalbByCat evalbCat = (doCatLevel) ? new EvalbByCat("EvalbByCat LP/LR", true, labelRegex) : null;
    final TreeTransformer tc = tlpp.collinizer();

    //The evalb ref implementation assigns status for each tree pair as follows:
    //
    //   0 - Ok (yields match)
    //   1 - length mismatch
    //   2 - null parse e.g. (()).
    //
    //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.
    final Iterator<Tree> goldItr = goldTreebank.iterator();
    final Iterator<Tree> guessItr = guessTreebank.iterator();
    int goldLineId = 0;
    int guessLineId = 0;
    int skippedGuessTrees = 0;
    while( guessItr.hasNext() && goldItr.hasNext() ) {
      Tree guessTree = guessItr.next();
      List<Label> guessYield = guessTree.yield();
      guessLineId++;

      Tree goldTree = goldItr.next();
      List<Label> goldYield = goldTree.yield();
      goldLineId++;

      // Check that we should evaluate this tree
      if(goldYield.size() > maxGoldYield) {
        skippedGuessTrees++;
        continue;
      }

      // Only trees with equal yields can be evaluated
      if(goldYield.size() != guessYield.size()) {
        pwOut.printf("Yield mismatch gold: %d tokens vs. guess: %d tokens (lines: gold %d guess %d)%n", goldYield.size(), guessYield.size(), goldLineId, guessLineId);
        skippedGuessTrees++;
        continue;
      }

      final Tree evalGuess = tc.transformTree(guessTree);
      final Tree evalGold = tc.transformTree(goldTree);

      metric.evaluate(evalGuess, evalGold, ((VERBOSE) ? pwOut : null));

      if(doCatLevel) evalbCat.evaluate(evalGuess, evalGold, ((VERBOSE) ? pwOut : null));
      if(sortByF1) storeTrees(queue,guessTree,goldTree,metric.getLastF1());
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.trees.TreeTransformer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.