Package edu.stanford.nlp.trees.treebank

Examples of edu.stanford.nlp.trees.treebank.Mapper


    System.err.println("Reading from: " + path.getPath());
    try {
      BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(path), "UTF-8"));
      TokenizerFactory<CoreLabel> tf = ArabicTokenizer.factory();
      tf.setOptions(tokOptions);
      Mapper lexMapper = new DefaultLexicalMapper();
      lexMapper.setup(null, "StripSegMarkersInUTF8", "StripMorphMarkersInUTF8");

      int lineId = 0;
      for(String line; (line = br.readLine()) != null; lineId++) {
        line = line.trim();

        // Tokenize with the tokenizer
        List<CoreLabel> tokenizedLine = tf.getTokenizer(new StringReader(line)).tokenize();
        System.out.println(Sentence.listToString(tokenizedLine));

        // Tokenize with the mapper
        StringBuilder sb = new StringBuilder();
        String[] toks = line.split("\\s+");
        for (String tok : toks) {
          String mappedTok = lexMapper.map(null, tok);
          sb.append(mappedTok).append(" ");
        }
        List<String> mappedToks = Arrays.asList(sb.toString().trim().split("\\s+"));

        // Evaluate the output
View Full Code Here


    }
    return sb.toString();
  }

  public static void main(String[] args) {
    Mapper mapper = new LDCPosMapper(true);
    File mapFile = new File("/u/nlp/data/Arabic/ldc/atb-latest/p1/docs/atb1-v4.0-taglist-conversion-to-PennPOS-forrelease.lisp");
    mapper.setup(mapFile);

    String test1 = "DET+NOUN+NSUFF_FEM_SG+CASE_DEF_ACC";
    String test2 = "ADJXXXXX";
    String test3 = "REL_ADV";
    String test4 = "NUMERIC_COMMA";

    System.out.printf("%s --> %s\n",test1,mapper.map(test1, null));
    System.out.printf("%s --> %s\n",test2,mapper.map(test2, null));
    System.out.printf("%s --> %s\n",test3,mapper.map(test3, null));
    System.out.printf("%s --> %s\n",test4,mapper.map(test4, null));
  }
View Full Code Here

    Matcher numMatcher = hasDigit.matcher(element);
    return !(numMatcher.find() || parentTagsToEscape.contains(parent));
  }

  public static void main(String[] args) {
    Mapper m = new DefaultLexicalMapper();

    System.out.printf("< :-> %s\n",m.map(null, "FNKqq"));
  }
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.trees.treebank.Mapper

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.