Package edu.stanford.nlp.time

Source Code of edu.stanford.nlp.time.SUTimeSimpleParser$SUTimeParsingError

package edu.stanford.nlp.time;

import java.util.List;
import java.util.Map;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.AnnotationPipeline;
import edu.stanford.nlp.pipeline.POSTaggerAnnotator;
import edu.stanford.nlp.pipeline.TokenizerAnnotator;
import edu.stanford.nlp.pipeline.WordsToSentencesAnnotator;
import edu.stanford.nlp.time.SUTime.Temporal;

import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;

/**
* Simple wrapper around SUTime for parsing lots of strings outside of Annotation objects.
*
* @author David McClosky
*/
public class SUTimeSimpleParser {

  private SUTimeSimpleParser() {} // static methods

  /**
   * Indicates that any exception occurred inside the TimeAnnotator.  This should only be caused by bugs in SUTime.
   */
  public static class SUTimeParsingError extends Exception {
    private static final long serialVersionUID = 1L;
    public String timeExpression;

    public SUTimeParsingError(String timeExpression) {
      this.timeExpression = timeExpression;
    }

    public String getLocalizedMessage() {
      return "Error while parsing '" + timeExpression + "'";
    }

  }

  private static AnnotationPipeline pipeline;
  private static Map<String, Temporal> cache;
  public static int calls = 0;
  public static int misses = 0;

  static {
    pipeline = makeNumericPipeline();
    cache = Generics.newHashMap();
  }

  private static AnnotationPipeline makeNumericPipeline() {
    AnnotationPipeline pipeline = new AnnotationPipeline();
    pipeline.addAnnotator(new TokenizerAnnotator(false, "en"));
    pipeline.addAnnotator(new WordsToSentencesAnnotator(false));
    pipeline.addAnnotator(new POSTaggerAnnotator(false));
    pipeline.addAnnotator(new TimeAnnotator(true));

    return pipeline;
  }

  /**
   * Parse a string with SUTime.
   *
   * @throws SUTimeParsingError if anything goes wrong
   */
  public static Temporal parse(String str) throws SUTimeParsingError {
    try {
      Annotation doc = new Annotation(str);
      pipeline.annotate(doc);

      assert doc.get(CoreAnnotations.SentencesAnnotation.class) != null;
      assert doc.get(CoreAnnotations.SentencesAnnotation.class).size() > 0;
      List<CoreMap> timexAnnotations = doc.get(TimeAnnotations.TimexAnnotations.class);
      if (timexAnnotations.size() > 1) {
        throw new RuntimeException("Too many timexes for '" + str + "'");
      }
      CoreMap timex = timexAnnotations.get(0);

      return timex.get(TimeExpression.Annotation.class).getTemporal();
    } catch (Exception e) {
      SUTimeSimpleParser.SUTimeParsingError parsingError = new SUTimeSimpleParser.SUTimeParsingError(str);
      parsingError.initCause(e);
      throw parsingError;
    }
  }

  /**
   * Cached wrapper of parse method.
   */
  public static Temporal parseUsingCache(String str) throws SUTimeParsingError {
    calls++;
    if (!cache.containsKey(str)) {
      misses++;
      cache.put(str, parse(str));
    }

    return cache.get(str);
  }

  public static void main(String[] args) throws SUTimeParsingError {
    for (String s : new String[] {"1972", "1972-07-05", "0712", "1972-04"}) {
      System.out.println("String: " + s);
      Temporal timeExpression = parse(s);
      System.out.println("Parsed: " + timeExpression);
      System.out.println();
    }
  }

}
TOP

Related Classes of edu.stanford.nlp.time.SUTimeSimpleParser$SUTimeParsingError

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.