Package edu.stanford.nlp.international.arabic.pipeline

Source Code of edu.stanford.nlp.international.arabic.pipeline.UniversalPOSMapper

package edu.stanford.nlp.international.arabic.pipeline;

import java.io.*;
import java.util.List;
import java.util.Map;

import edu.stanford.nlp.international.arabic.ArabicMorphoFeatureSpecification;
import edu.stanford.nlp.international.morph.MorphoFeatureSpecification;
import edu.stanford.nlp.international.morph.MorphoFeatures;
import edu.stanford.nlp.international.morph.MorphoFeatureSpecification.MorphoFeatureType;
import edu.stanford.nlp.util.Generics;

/**
* Maps LDC-provided Bies mappings to the Universal POS tag set described in
*   Slav Petrov, Dipanjan Das and Ryan McDonald. "A Universal Part-of-Speech Tagset."
* <p>
* Includes optional support for adding morphological annotations via the setup method.
*
* @author Spence Green
*
*/
public class UniversalPOSMapper extends LDCPosMapper {

  private final Map<String,String> universalMap;
  private final MorphoFeatureSpecification morphoSpec;
 
  public UniversalPOSMapper(){
    super(false); //Don't add the determiner split
   
    universalMap = Generics.newHashMap();
    morphoSpec = new ArabicMorphoFeatureSpecification();
  }
 
  /**
   * First map to the LDC short tags. Then map to the Universal POS. Then add
   * morphological annotations.
   */
  @Override
  public String map(String posTag, String terminal) {
    String rawTag = posTag.trim();

    String shortTag = tagsToEscape.contains(rawTag) ? rawTag : tagMap.get(rawTag);
    if( shortTag == null ) {
      System.err.printf("%s: No LDC shortened tag for %s%n", this.getClass().getName(), rawTag);
      return rawTag;
    }
   
    String universalTag = universalMap.get(shortTag);
    if( ! universalMap.containsKey(shortTag)) {
      System.err.printf("%s: No universal tag for LDC tag %s%n", this.getClass().getName(),shortTag);
      universalTag = shortTag;
    }
  
    MorphoFeatures feats = new MorphoFeatures(morphoSpec.strToFeatures(rawTag));
   
    String functionalTag = feats.getTag(universalTag);
   
    return functionalTag;
  }
 
  @Override
  public void setup(File path, String... options) {
    //Setup the Bies tag mapping
    super.setup(path, new String[0]);
   
    for(String opt : options) {
      String[] optToks = opt.split(":");
      if(optToks[0].equals("UniversalMap") && optToks.length == 2) {
        loadUniversalMap(optToks[1]);
     
      } else {
        //Maybe it's a morphological feature
        //Both of these calls will throw exceptions if the feature is illegal/invalid
        MorphoFeatureType feat = MorphoFeatureType.valueOf(optToks[0]);
        List<String> featVals = morphoSpec.getValues(feat);
        morphoSpec.activate(feat);
      }
    }
  }

  private void loadUniversalMap(String path) {
   
    LineNumberReader reader = null;
    try {
      reader = new LineNumberReader(new FileReader(path));
     
      for(String line; (line = reader.readLine()) != null;) {
        if(line.trim().equals("")) continue;
       
        String[] toks = line.trim().split("\\s+");
        if(toks.length != 2)
          throw new RuntimeException("Invalid mapping line: " + line);
       
        universalMap.put(toks[0], toks[1]);
      }
     
      reader.close();
   
    } catch (FileNotFoundException e) {
      System.err.printf("%s: File not found %s%n", this.getClass().getName(),path);
   
    } catch (IOException e) {
      int lineId = (reader == null) ? -1 : reader.getLineNumber();
      System.err.printf("%s: Error at line %d%n", this.getClass().getName(),lineId);
      e.printStackTrace();
    }
  }
}
TOP

Related Classes of edu.stanford.nlp.international.arabic.pipeline.UniversalPOSMapper

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.