Package com.code972.hebmorph.lemmafilters

Source Code of com.code972.hebmorph.lemmafilters.BasicLemmaFilter

/***************************************************************************
*   Copyright (C) 2010-2013 by                                            *
*      Itamar Syn-Hershko <itamar at code972 dot com>                     *
*    Ofer Fort <oferiko at gmail dot com> (initial Java port)           *
*                                                                         *
*   This program is free software; you can redistribute it and/or modify  *
*   it under the terms of the GNU Affero General Public License           *
*   version 3, as published by the Free Software Foundation.              *
*                                                                         *
*   This program is distributed in the hope that it will be useful,       *
*   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
*   GNU Affero General Public License for more details.                   *
*                                                                         *
*   You should have received a copy of the GNU Affero General Public      *
*   License along with this program; if not, see                          *
*   <http://www.gnu.org/licenses/>.                                       *
**************************************************************************/
package com.code972.hebmorph.lemmafilters;

import com.code972.hebmorph.HebrewToken;
import com.code972.hebmorph.Token;
import com.code972.hebmorph.hspell.Constants.DMask;

import java.util.List;


/**
* BasicLemmaFilter will only filter collections with more than one lemma. For them, any lemma
* scored below 0.7 is probably a result of some heavy toleration, and will be ignored.
*/
public class BasicLemmaFilter extends LemmaFilterBase {

    @Override
    public List<Token> filterCollection(final String word, final List<Token> collection, final List<Token> preallocatedOut) {
        if (collection.size() > 1) {
            final List<Token> ret = super.filterCollection(word, collection, preallocatedOut);
            if (ret != null && ret.size() > 0) {
                return ret;
            }
        }
        return null;
    }

    @Override
    public boolean isValidToken(final Token t) {
        if (t instanceof HebrewToken) {
            final HebrewToken ht = (HebrewToken) t;

            // Pose a minimum score limit for words
            if (ht.getScore() < 0.7f) {
                return false;
            }

            // Pose a higher threshold to verbs (easier to get irrelevant verbs from toleration)
            if (((ht.getMask() & DMask.D_TYPEMASK) == DMask.D_VERB) && (ht.getScore() < 0.85f)) {
                return false;
            }
        }
        return true;
    }
}
TOP

Related Classes of com.code972.hebmorph.lemmafilters.BasicLemmaFilter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.