Examples of TaggedStem


Examples of seekfeel.miners.features.TaggedStem

    @Override
    public LinkedHashMap<Integer, Double> computeFeatures(DataUnit example, ArrayList<Feature> features, CorpusHolder corpus) {
        LinkedHashMap<Integer, Double> featuresValues = new LinkedHashMap<Integer, Double>();
        ArrayList<Feature> taggedStems = new ArrayList<Feature>();
        TaggedStem temp;
        StanfordParser theParser = new StanfordParser(Language.Arabic);
        ParsingOptions opts = new ParsingOptions();
        opts.setPosTag(true);
        ArrayList<Word> textWords;
        ParsedData parsedOut;
        ArabicStemmerKhoja arabicStemmer = new ArabicStemmerKhoja();
        // Convert the Example into a tagged Stems Array
        textWords = StringToWordsTokenizer.tokenize(example.getDataBody());
        example.getDataBody();
        parsedOut = theParser.parse(textWords, opts);
        for (int i = 0; i < parsedOut.getTaggedWords().size(); i++) {
            temp = new TaggedStem();
            temp.setStemTag(parsedOut.getTaggedWords().get(i).getWordTag());
            temp.setTheStem(arabicStemmer.stem(textWords.get(i).word()));

            if (!taggedStems.contains(temp)) {
                taggedStems.add(temp);
            }
        }
View Full Code Here

Examples of seekfeel.miners.features.TaggedStem

    @Override
    public ArrayList<Feature> extractFeatures(CorpusHolder corpus) {
        ArrayList<DataUnit> positiveExs = corpus.getPositiveExamples();
        ArrayList<DataUnit> negativeExs = corpus.getNegativeExamples();
        ArrayList<Feature> taggedStems = new ArrayList<Feature>();
        TaggedStem temp;
        StanfordParser theParser = new StanfordParser(Language.Arabic);
        ParsingOptions opts = new ParsingOptions();
        opts.setPosTag(true);
        ArrayList<Word> textWords;
        ParsedData parsedOut;
        ArabicStemmerKhoja arabicStemmer = new ArabicStemmerKhoja();
        for (DataUnit textUnit : positiveExs) {
            textWords = StringToWordsTokenizer.tokenize(textUnit.getDataBody());
            textUnit.getDataBody();
            parsedOut = theParser.parse(textWords, opts);
            for (int i = 0; i < parsedOut.getTaggedWords().size(); i++) {
                temp = new TaggedStem();
                temp.setStemTag(parsedOut.getTaggedWords().get(i).getWordTag());
                temp.setTheStem(arabicStemmer.stem(textWords.get(i).word()));

                if (!taggedStems.contains(temp)) {
                    taggedStems.add(temp);
                }
            }
        }
        for (DataUnit textUnit : negativeExs) {
            textWords = StringToWordsTokenizer.tokenize(textUnit.getDataBody());
            textUnit.getDataBody();
            parsedOut = theParser.parse(textWords, opts);
            for (int i = 0; i < parsedOut.getTaggedWords().size(); i++) {
                temp = new TaggedStem();
                temp.setStemTag(parsedOut.getTaggedWords().get(i).getWordTag());
                temp.setTheStem(arabicStemmer.stem(textWords.get(i).word()));

                if (!taggedStems.contains(temp)) {
                    taggedStems.add(temp);
                }
            }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.