Package seekfeel.miners.features

Examples of seekfeel.miners.features.UniGram


    public ArrayList<Feature> extractFeatures(CorpusHolder corpus) {

        ArrayList unigrams = new ArrayList<Feature>();
        ArrayList<DataUnit> positiveExamples = corpus.getPositiveExamples();
        ArrayList<DataUnit> negativeExamples = corpus.getNegativeExamples();
        UniGram tempUniGram;

        ArrayList<String> words;
        for (DataUnit posReview : positiveExamples) {
            words = TextSplitter.splitTextToWords(posReview.getDataBody());
            for (String word : words) {
                if (word.length() == 0) {
                    continue;
                }
                tempUniGram = new UniGram();
                tempUniGram.setUniGram(word);
                if (unigrams.contains(tempUniGram)) {
                    ((UniGram) unigrams.get(unigrams.indexOf(tempUniGram))).inCrementPositive();
                } else {
                    tempUniGram.setPositiveCount(1);
                    unigrams.add(tempUniGram);
                }
            }

        }
        for (DataUnit negReview : negativeExamples) {
            words = TextSplitter.splitTextToWords(negReview.getDataBody());
            for (String word : words) {
                if (word.length() == 0) {
                    continue;
                }
                tempUniGram = new UniGram();
                tempUniGram.setUniGram(word);
                if (unigrams.contains(tempUniGram)) {
                    ((UniGram) unigrams.get(unigrams.indexOf(tempUniGram))).inCrementNegative();
                } else {
                    tempUniGram.setNegativeCount(1);
                    unigrams.add(tempUniGram);
                }
            }

        }
View Full Code Here


    }

    @Override
    public ArrayList<Feature> filter(int countThreshold, ArrayList<Feature> allFeatures) {
        ArrayList<Feature> filteredFeatures = new ArrayList<Feature>();
        UniGram tempFeat;
        for (Feature feat : allFeatures) {
            tempFeat = (UniGram) feat;
            if ((tempFeat.getPositiveCount() + tempFeat.getNegativeCount()) >= countThreshold) {
                filteredFeatures.add(tempFeat);
            }
        }
        return filteredFeatures;
    }
View Full Code Here

public class DeltaTFIDFComputerUni extends FeatureValueComputer {

    @Override
    public LinkedHashMap<Integer, Double> computeFeatures(DataUnit example, ArrayList<Feature> features, CorpusHolder corpus) {
        LinkedHashMap<Integer, Double> featuresValues = new LinkedHashMap<Integer, Double>();
        UniGram tempUnigram;
        // The Equation used in calculating feature value :
        // V = Count * log ( NumPositiveExamples * Count of term in Negative corpus / NumNegativeExamples * Count of term in positive corpus)
        double featureValue;
        int countInDoc = 0;
        double countInPos;
        double countInNeg;
        double posToNegRatio = (double) corpus.getNumPositives() / (double) corpus.getNumNegatives();
        HashMap<String, Integer> wordsWithCount = TextSplitter.getWordsCount(example.getDataBody());

        /*while (wordsIterator.hasNext()) {
            tempEnt = wordsIterator.next();
            featIndex = uniGrams.indexOf(tempEnt.getKey());
            if (featIndex >= 0) {
                tempUnigram = (UniGram) features.get(featIndex);
                countInDoc = tempEnt.getValue();
                countInPos = tempUnigram.getPositiveCount() + 0.0001;
                countInNeg = tempUnigram.getNegativeCount() + 0.0001;
                featureValue = countInDoc * (Math.log((posToNegRatio * (countInNeg / countInPos))));
                featuresValues.put(featIndex, featureValue);
            }
        }*/
        for (int i = 0; i < features.size(); i++) {
            tempUnigram = (UniGram) features.get(i);
            if (wordsWithCount.containsKey(((UniGram)features.get(i)).getUniGram())) {
                countInDoc = wordsWithCount.get(((UniGram)features.get(i)).getUniGram());
                countInPos = tempUnigram.getPositiveCount() + 0.0001;
                countInNeg = tempUnigram.getNegativeCount() + 0.0001;
                featureValue = countInDoc * (Math.log((posToNegRatio * (countInNeg / countInPos))));
                featuresValues.put(i, 1.0);
            }
        }
        return featuresValues;
View Full Code Here

TOP

Related Classes of seekfeel.miners.features.UniGram

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.