Package org.dbpedia.spotlight.spot.cooccurrence.filter

Examples of org.dbpedia.spotlight.spot.cooccurrence.filter.FilterTermsize


  public List<SurfaceFormOccurrence> select(List<SurfaceFormOccurrence> surfaceFormOccurrences) {

    List<SurfaceFormOccurrence> selectedOccurrences = new LinkedList<SurfaceFormOccurrence>();

    FilterPOS filterPOS = new FilterPOS();
    FilterTermsize unigramFilter = new FilterTermsize(FilterTermsize.Termsize.unigram);
    FilterPattern filterPattern = new FilterPattern();

    SpotClassifier unigramClassifier = ClassifierFactory.getClassifierInstanceUnigram();
    SpotClassifier ngramClassifier = ClassifierFactory.getClassifierInstanceNGram();

    assert unigramClassifier != null;
    assert ngramClassifier != null;

    //ngramClassifier.setVerboseMode(true);                                         f
    //unigramClassifier.setVerboseMode(true);
    List<String> decisions = new LinkedList<String>();

    for(SurfaceFormOccurrence surfaceFormOccurrence : surfaceFormOccurrences) {

            if (surfaceFormOccurrence.surfaceForm().name().trim().length()==0) {
                LOG.warn("I have an occurrence with empty surface form. :-O Ignoring.");
                LOG.error(surfaceFormOccurrence);
                continue;
            }

            if (! (surfaceFormOccurrence.context() instanceof TaggedText)) { //FIXME added this to avoid breaking, but code below will never run if we don't pass the taggedtext
                LOG.error(String.format("SurfaceFormOccurrence did not contain TaggedText. Cannot apply %s",this.getClass()));
       
                selectedOccurrences.add(surfaceFormOccurrence);
                continue;
            }


      if(unigramFilter.applies(surfaceFormOccurrence)) {

        /**
         * Unigram (n = 1)
         */

 
View Full Code Here


    instanceBuilder = InstanceBuilderFactory.createInstanceBuilderNGram(
        configuration.getSpotterConfiguration().getCoOcSelectorDatasource(), dataProvider);
    instanceBuilder.setVerboseMode(true);

    /** Filter the data set: */
    FilterTermsize filterTermsize = new FilterTermsize(FilterTermsize.Termsize.unigram, spotlightFactory.textUtil());
    filterTermsize.inverse();

    FilterPattern filterPattern = new FilterPattern();

    filters.add(filterTermsize);
    filters.add(filterPattern);
View Full Code Here

        configuration.getSpotterConfiguration().getCoOcSelectorDatasource(),
        dataProvider);
   
    instanceBuilder.setVerboseMode(true);

    filters.add(new FilterTermsize(FilterTermsize.Termsize.unigram));
    filters.add(new FilterPOS());
    filters.add(new FilterPattern());

    header = new Instances("UnigramTraining", buildAttributeList(), buildAttributeList().size());
View Full Code Here

TOP

Related Classes of org.dbpedia.spotlight.spot.cooccurrence.filter.FilterTermsize

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.