Package edu.stanford.nlp.sempre.paraphrase.rules

Source Code of edu.stanford.nlp.sempre.paraphrase.rules.VerbSemClassMatcher$VerbSemClassMatch

package edu.stanford.nlp.sempre.paraphrase.rules;

import java.util.LinkedList;
import java.util.List;
import java.util.Properties;

import com.google.common.base.Joiner;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreAnnotations.LemmaAnnotation;
import edu.stanford.nlp.ling.tokensregex.TokenSequenceMatcher;
import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.util.CoreMap;

public class VerbSemClassMatcher {

  // TOKEN REGEX TO MATCH VERB AND SEMANTIC CLASS
  //matching non-wh objects
  public static final TokenSequencePattern p1 = TokenSequencePattern.compile
      (
          "[ { word:\"what\"} ] ([ { word:/.*/ } ]* ) [ { lemma:\"be\" } | {lemma:\"do\"} ] [ { word:/.*/ } ]* " +
          "[ { tag:/NNP.*/ } ]+ ([ { tag:/VB.*/} ] ) [ { word:/.*/ } ]* ");
  //matching wh objects
  public static final TokenSequencePattern p2 = TokenSequencePattern.compile
      (
          "( [ { word:/when|where|who/} ]) [ { word:/.*/ } ]*  [ { lemma:\"be\" } | {lemma:\"do\"} ] [ { word:/.*/ } ]* [ { tag:/NNP.*/ } ]+ "  +
          "([ { tag:/VB.*/} ] ) [ { word:/.*/ } ]* ");
  //matching non-wh subjects
  public static final TokenSequencePattern p3 = TokenSequencePattern.compile
      (
          "[ { word:\"what\"} ] ([ { tag:\"NN\" } | { tag:\"NNS\" }] ) "  +
          "([ { tag:/VB.*/} & !{lemma:/be|do|have/}] ) [ { word:/.*/ } ]* [ { tag:/NNP.*/ } ]+ [ { word:/.*/ } ]* ");
  //matching wh subjects
  public static final TokenSequencePattern p4 = TokenSequencePattern.compile
      (
          "([ { word:\"who\"} ]) ([ { tag:/VB.*/} & !{lemma:/be|do|have/} ] ) [ { word:/.*/ } ]* [ { tag:/NNP.*/ } ]+ [ { word:/.*/ } ]* ");

  private Properties props = new Properties();
  private StanfordCoreNLP pipeline; 
  private static VerbSemClassMatcher extractor = null;

  public static VerbSemClassMatcher getInstance() {
    if(extractor==null) extractor =  new VerbSemClassMatcher();
    return extractor;
  }

  private VerbSemClassMatcher() {
    props.put("annotators", "tokenize, ssplit, pos, lemma");
    props.put("pos.model", "edu/stanford/nlp/models/pos-tagger/english-caseless-left3words-distsim.tagger"); //caseless model
    pipeline = new StanfordCoreNLP(props);
  }

  public VerbSemClassMatch match(String sentence) {

    Annotation annotation = new Annotation(sentence.toLowerCase());
    pipeline.annotate(annotation);
    TokenSequenceMatcher m1 = p1.getMatcher(annotation.get(CoreAnnotations.TokensAnnotation.class));
    if(m1.find())
      return new VerbSemClassMatch(extractMatch(m1.groupNodes(2)), extractMatch(m1.groupNodes(1)), false);
    TokenSequenceMatcher m2 = p2.getMatcher(annotation.get(CoreAnnotations.TokensAnnotation.class));
    if(m2.find())
      return new VerbSemClassMatch(extractMatch(m2.groupNodes(2)), extractMatch(m2.groupNodes(1)), false);
    TokenSequenceMatcher m3 = p3.getMatcher(annotation.get(CoreAnnotations.TokensAnnotation.class));
    if(m3.find())
      return new VerbSemClassMatch(extractMatch(m3.groupNodes(2)), extractMatch(m3.groupNodes(1)), true);
    TokenSequenceMatcher m4 = p3.getMatcher(annotation.get(CoreAnnotations.TokensAnnotation.class));
    if(m4.find())
      return new VerbSemClassMatch(extractMatch(m4.groupNodes(2)), extractMatch(m4.groupNodes(1)), true);
    return null;
  }

  public String extractMatch(List<? extends CoreMap> maps) {
    List<String> res = new LinkedList<String>();
    for(CoreMap map: maps)
      res.add(map.get(LemmaAnnotation.class));
    return Joiner.on(' ').join(res);
  }

  public static class VerbSemClassMatch {

    public final String verb;
    public final String semClass;
    public final boolean isSubj;

    public VerbSemClassMatch(String verb, String semClass, boolean isSubj) {
      this.verb = verb;
      this.semClass = semClass;
      this.isSubj = isSubj;
    }   
   
    public String toString() {
      return verb+"\t"+semClass+"\t"+isSubj;
    }
  }
}
TOP

Related Classes of edu.stanford.nlp.sempre.paraphrase.rules.VerbSemClassMatcher$VerbSemClassMatch

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.