Package opennlp.tools.parser.chunking

Source Code of opennlp.tools.parser.chunking.CheckContextGenerator

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreemnets.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package opennlp.tools.parser.chunking;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;

import opennlp.tools.parser.AbstractContextGenerator;
import opennlp.tools.parser.Parse;

/**
* Class for generating predictive context for deciding when a constituent is complete.
*/
public class CheckContextGenerator extends AbstractContextGenerator {

  /**
   * Creates a new context generator for generating predictive context for deciding when a constituent is complete.
   */
  public CheckContextGenerator() {
    super();
  }

  public String[] getContext(Object o) {
    Object[] params = (Object[]) o;
    return getContext((Parse[]) params[0], (String) params[1], ((Integer) params[2]).intValue(), ((Integer) params[3]).intValue());
  }

  /**
   * Returns predictive context for deciding whether the specified constituents between the specified start and end index
   * can be combined to form a new constituent of the specified type.
   * @param constituents The constituents which have yet to be combined into new constituents.
   * @param type The type of the new constituent proposed.
   * @param start The first constituent of the proposed constituent.
   * @param end The last constituent of the proposed constituent.
   * @return The predictive context for deciding whether a new constituent should be created.
   */
  public String[] getContext(Parse[] constituents, String type, int start, int end) {
    int ps = constituents.length;
    List<String> features = new ArrayList<String>(100);

    //default
    features.add("default");
    //first constituent label
    features.add("fl="+constituents[0].getLabel());
    Parse pstart = constituents[start];
    Parse pend = constituents[end];
    checkcons(pstart, "begin", type, features);
    checkcons(pend, "last", type, features);
    StringBuffer production = new StringBuffer(20);
    StringBuffer punctProduction = new StringBuffer(20);
    production.append("p=").append(type).append("->");
    punctProduction.append("pp=").append(type).append("->");
    for (int pi = start; pi < end; pi++) {
      Parse p = constituents[pi];
      checkcons(p, pend, type, features);
      production.append(p.getType()).append(",");
      punctProduction.append(p.getType()).append(",");
      Collection<Parse> nextPunct = p.getNextPunctuationSet();
      if (nextPunct != null) {
        for (Iterator<Parse> pit=nextPunct.iterator();pit.hasNext();) {
          Parse punct = pit.next();
          punctProduction.append(punct.getType()).append(",");
        }
      }
    }
    production.append(pend.getType());
    punctProduction.append(pend.getType());
    features.add(production.toString());
    features.add(punctProduction.toString());
    Parse p_2 = null;
    Parse p_1 = null;
    Parse p1 = null;
    Parse p2 = null;
    Collection<Parse> p1s = constituents[end].getNextPunctuationSet();
    Collection<Parse> p2s = null;
    Collection<Parse> p_1s = constituents[start].getPreviousPunctuationSet();
    Collection<Parse> p_2s = null;
    if (start - 2 >= 0) {
      p_2 = constituents[start - 2];
    }
    if (start - 1 >= 0) {
      p_1 = constituents[start - 1];
      p_2s = p_1.getPreviousPunctuationSet();
    }
    if (end + 1 < ps) {
      p1 = constituents[end + 1];
      p2s = p1.getNextPunctuationSet();
    }
    if (end + 2 < ps) {
      p2 = constituents[end + 2];
    }
    surround(p_1, -1, type, p_1s, features);
    surround(p_2, -2, type, p_2s, features);
    surround(p1, 1, type, p1s, features);
    surround(p2, 2, type, p2s, features);

    return features.toArray(new String[features.size()]);
  }
}
TOP

Related Classes of opennlp.tools.parser.chunking.CheckContextGenerator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.