Package weka.core.stemmers

Examples of weka.core.stemmers.ArabicStemmerKhoja


        StanfordParser theParser = new StanfordParser(Language.Arabic);
        ParsingOptions opts = new ParsingOptions();
        opts.setPosTag(true);
        ArrayList<Word> textWords;
        ParsedData parsedOut;
        ArabicStemmerKhoja arabicStemmer = new ArabicStemmerKhoja();
        // Convert the Example into a tagged Stems Array
        textWords = StringToWordsTokenizer.tokenize(example.getDataBody());
        example.getDataBody();
        parsedOut = theParser.parse(textWords, opts);
        for (int i = 0; i < parsedOut.getTaggedWords().size(); i++) {
            temp = new TaggedStem();
            temp.setStemTag(parsedOut.getTaggedWords().get(i).getWordTag());
            temp.setTheStem(arabicStemmer.stem(textWords.get(i).word()));

            if (!taggedStems.contains(temp)) {
                taggedStems.add(temp);
            }
        }
View Full Code Here


        StanfordParser theParser = new StanfordParser(Language.Arabic);
        ParsingOptions opts = new ParsingOptions();
        opts.setPosTag(true);
        ArrayList<Word> textWords;
        ParsedData parsedOut;
        ArabicStemmerKhoja arabicStemmer = new ArabicStemmerKhoja();
        for (DataUnit textUnit : positiveExs) {
            textWords = StringToWordsTokenizer.tokenize(textUnit.getDataBody());
            textUnit.getDataBody();
            parsedOut = theParser.parse(textWords, opts);
            for (int i = 0; i < parsedOut.getTaggedWords().size(); i++) {
                temp = new TaggedStem();
                temp.setStemTag(parsedOut.getTaggedWords().get(i).getWordTag());
                temp.setTheStem(arabicStemmer.stem(textWords.get(i).word()));

                if (!taggedStems.contains(temp)) {
                    taggedStems.add(temp);
                }
            }
        }
        for (DataUnit textUnit : negativeExs) {
            textWords = StringToWordsTokenizer.tokenize(textUnit.getDataBody());
            textUnit.getDataBody();
            parsedOut = theParser.parse(textWords, opts);
            for (int i = 0; i < parsedOut.getTaggedWords().size(); i++) {
                temp = new TaggedStem();
                temp.setStemTag(parsedOut.getTaggedWords().get(i).getWordTag());
                temp.setTheStem(arabicStemmer.stem(textWords.get(i).word()));

                if (!taggedStems.contains(temp)) {
                    taggedStems.add(temp);
                }
            }
View Full Code Here

TOP

Related Classes of weka.core.stemmers.ArabicStemmerKhoja

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.