Package edu.stanford.nlp.international.french.scripts

Source Code of edu.stanford.nlp.international.french.scripts.TreeToMorfette

package edu.stanford.nlp.international.french.scripts;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.List;

import edu.stanford.nlp.international.morph.MorphoFeatureSpecification;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeReader;
import edu.stanford.nlp.trees.TreeReaderFactory;
import edu.stanford.nlp.trees.international.french.FrenchTreeReaderFactory;
import edu.stanford.nlp.util.Pair;

/**
* Writes out an FTB tree file in s-notation to Morfette format.
*
* @author Spence Green
*
*/
public class TreeToMorfette {

  /**
   * @param args
   */
  public static void main(String[] args) {
    if (args.length != 1) {
      System.err.printf("Usage: java %s tree_file%n", TreeToMorfette.class.getName());
      System.exit(-1);
    }

    String treeFile = args[0];
   
    TreeReaderFactory trf = new FrenchTreeReaderFactory();
    try {
      TreeReader tr = trf.newTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8")));
 
      for (Tree tree1; (tree1 = tr.readTree()) != null;) {
        List<Label> pretermYield = tree1.preTerminalYield();
        List<Label> yield = tree1.yield();
        int yieldLen = yield.size();
        for (int i = 0; i < yieldLen; ++i) {
          CoreLabel rawToken = (CoreLabel) yield.get(i);
          String word = rawToken.value();
          String morphStr = rawToken.originalText();
          Pair<String,String> lemmaMorph = MorphoFeatureSpecification.splitMorphString(word, morphStr);
          String lemma = lemmaMorph.first();
          String morph = lemmaMorph.second();
          if (morph == null || morph.equals("") || morph.equals("XXX")) {
            morph = ((CoreLabel) pretermYield.get(i)).value();
          }
          System.out.printf("%s %s %s%n", word, lemma, morph);
        }
        System.out.println();
      }
   
      tr.close();
     
    } catch (UnsupportedEncodingException e) {
      e.printStackTrace();
    } catch (FileNotFoundException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    }
  }
}
TOP

Related Classes of edu.stanford.nlp.international.french.scripts.TreeToMorfette

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.