Examples of Tagger


Examples of com.aliasi.tag.Tagger

  public void initialize(String text) {

    taggedTokens = new ArrayList<TaggedToken>();

    //Load the POS model:
    Tagger posTagger = lingPipeFactory.getPoSTaggerInstance();

    //1.) Tokenization
    long start = System.currentTimeMillis();
    List<String> tokenList = new ArrayList<String>();
    List<String> whiteList = new ArrayList<String>();
    Tokenizer tokenizer = lingPipeFactory.getTokenizerFactoryInstance().tokenizer(text.toCharArray(),
        0, text.length());
    tokenizer.tokenize(tokenList, whiteList);
    LOG.trace("Tokenization took " + (System.currentTimeMillis() - start) + "ms.");


    //2.) Sentence detection
    start = System.currentTimeMillis();
    String[] tokens = new String[tokenList.size()];
    String[] whites = new String[whiteList.size()];
    tokenList.toArray(tokens);
    whiteList.toArray(whites);

    SentenceModel sentenceModel = lingPipeFactory.getSentenceModelInstance();
    sentenceBoundaries = sentenceModel.boundaryIndices(tokens, whites);
    LOG.trace("Sentence segmentation took " + (System.currentTimeMillis() - start) + "ms.");


    //3.) Part-of-Speech tagging
    start = System.currentTimeMillis();
    int sentStartToken = 0;
    int sentEndToken;
    int textOffset = whites[0].length();


    /**
     * Tag every sentence with final punctuation (i < sentenceBoundaries.length), if there is
     * text without final punctuation, treat the rest of the text as a single sentence.
     */

    for (int i = 0; (i < sentenceBoundaries.length || sentStartToken < tokens.length); ++i) {

      if (i < sentenceBoundaries.length) {
        //We are between two sentence-final punctuation tokens.

        sentEndToken = sentenceBoundaries[i];
      } else {
        //We are beyond the last sentence-final punctuation: Tag the rest of the text.

        sentEndToken = tokens.length - 1;
      }


      Tagging<String> tags = posTagger.tag(tokenList.subList(sentStartToken, sentEndToken + 1));
      for (int j = 0; j < tags.size(); j++) {
        TaggedToken taggedToken = new TaggedToken(tags.token(j), whiteList.get(sentStartToken + j + 1), tags.tag(j), textOffset, null);
        taggedTokens.add(taggedToken);
        textOffset += tokens[sentStartToken + j].length() + whites[sentStartToken + j + 1].length();
      }
View Full Code Here

Examples of com.cloudera.flume.handlers.rolling.Tagger

  @Test
  public void testCorrectFilename() throws IOException, InterruptedException,
      FlumeSpecException {
    CollectorSink sink = new CollectorSink(new Context(),
        "file:///tmp/flume-test-correct-filename", "actual-file-", 10000,
        new Tagger() {
          public String getTag() {
            return "tag";
          }

          public String newTag() {
View Full Code Here

Examples of com.cloudera.flume.handlers.rolling.Tagger

    // putting in large ridiculous constant
    NaiveFileWALManager wal = new NaiveFileWALManager(dir);
    wal.open();

    // get a sink write to it and then be done with it.
    Tagger t = new ProcessTagger();
    EventSink sink = wal.newWritingSink(t);
    sink.open();
    sink.append(new EventImpl("foo".getBytes()));
    assertEquals(1, wal.getWritingTags().size());
View Full Code Here

Examples of com.cloudera.flume.handlers.rolling.Tagger

    // putting in large ridiculous constant
    NaiveFileFailoverManager wal = new NaiveFileFailoverManager(dir);
    wal.open();

    // get a sink write to it and then be done with it.
    Tagger t = new ProcessTagger();
    EventSink sink = wal.newWritingSink(t);
    sink.open();
    sink.append(new EventImpl("foo".getBytes()));
    assertEquals(1, wal.getWritingTags().size());
View Full Code Here

Examples of com.cloudera.flume.handlers.rolling.Tagger

    // putting in large ridiculous constant
    NaiveFileFailoverManager wal = new NaiveFileFailoverManager(dir);
    wal.open();

    // get a sink write to it and then be done with it.
    Tagger t = new ProcessTagger();
    EventSink sink = wal.newWritingSink(t);
    sink.open();
    sink.append(new EventImpl("foo".getBytes()));
    assertEquals(1, wal.getWritingTags().size());
View Full Code Here

Examples of com.cloudera.flume.handlers.rolling.Tagger

  @Test
  public void testCorrectFilename() throws IOException, InterruptedException,
      FlumeSpecException {
    CollectorSink sink = new CollectorSink(new Context(),
        "file:///tmp/flume-test-correct-filename", "actual-file-", 10000,
        new Tagger() {
          public String getTag() {
            return "tag";
          }

          public String newTag() {
View Full Code Here

Examples of com.cloudera.flume.handlers.rolling.Tagger

    // putting in large ridiculous constant
    NaiveFileWALManager wal = new NaiveFileWALManager(dir);
    wal.open();

    // get a sink write to it and then be done with it.
    Tagger t = new ProcessTagger();
    EventSink sink = wal.newWritingSink(t);
    sink.open();
    sink.append(new EventImpl("foo".getBytes()));
    assertEquals(1, wal.getWritingTags().size());
View Full Code Here

Examples of com.cloudera.flume.handlers.rolling.Tagger

    // putting in large ridiculous constant
    NaiveFileWALManager wal = new NaiveFileWALManager(dir);
    wal.open();

    // get a sink write to it and then be done with it.
    Tagger t = new ProcessTagger();
    EventSink sink = wal.newWritingSink(t);
    sink.open();
    sink.append(new EventImpl("foo".getBytes()));
    assertEquals(1, wal.getWritingTags().size());
View Full Code Here

Examples of com.cloudera.flume.handlers.rolling.Tagger

   */
  @Test
  public void testCorrectFilename() throws IOException {
    CollectorSink sink = new CollectorSink(
        "file:///tmp/flume-test-correct-filename", "actual-file-", 10000,
        new Tagger() {
          public String getTag() {
            return "tag";
          }

          public String newTag() {
View Full Code Here

Examples of com.cloudera.flume.handlers.rolling.Tagger

    // putting in large ridiculous constant
    NaiveFileFailoverManager wal = new NaiveFileFailoverManager(dir);
    wal.open();

    // get a sink write to it and then be done with it.
    Tagger t = new ProcessTagger();
    EventSink sink = wal.newWritingSink(t);
    sink.open();
    sink.append(new EventImpl("foo".getBytes()));
    assertEquals(1, wal.getWritingTags().size());
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.