Examples of ParsedData


Examples of com.findwise.utils.tika.ParsedData

                URL url = it.next();
                URLConnection connection = createConnection(url);
                final InputStream inputStream = connection.getInputStream();
                try {
                    String prefix = field + num + "_";
                    ParsedData parsedData = inputStreamParser.parse(inputStream);
                    documentParserHelper.addParsedDataToDocument(parsedData, doc, prefix);
                } finally {
                    inputStream.close();
                }
            }
View Full Code Here

Examples of com.findwise.utils.tika.ParsedData

    }

    private void enrichDocumentWithFileContents(LocalDocument doc, InputStream stream) throws IOException,
            SAXException, TikaException {
        InputStreamParser inputStreamParser = new InputStreamParser();
        ParsedData parsedData = inputStreamParser.parse(stream);

        addTextToDocument(doc, parsedData.getContent());
        addMetadataToDocument(doc, parsedData.getMetadata());
    }
View Full Code Here

Examples of com.findwise.utils.tika.ParsedData

        DocumentParserHelper documentParserHelper = new DocumentParserHelper(addMetaData, addLanguage);
        for (String fileName : files) {
            DocumentFile<Local> df = doc.getFile(fileName);
            String prefix = fileName.replace('.', '_') + "_";
            InputStreamParser inputStreamParser = new InputStreamParser(parser);
            ParsedData parsedData = inputStreamParser.parse(df.getStream());
            documentParserHelper.addParsedDataToDocument(parsedData, doc, prefix);
        }
    }
View Full Code Here

Examples of seekfeel.utilities.stanfordwrapper.ParsedData

    public Sentiment classifyText(String text) {
        ArrayList<Word> allWords = StringToWordsTokenizer.tokenize(text);
        ParsingOptions popts = new ParsingOptions();
        popts.setParseRelations(true);
        popts.setPosTag(true);
        ParsedData result = sParser.parse(allWords, popts);
        return classifyTextByNouns(result, allWords);
    }
View Full Code Here

Examples of seekfeel.utilities.stanfordwrapper.ParsedData

        feature = textAndFeat[1];
        ArrayList<Word> Words = StringToWordsTokenizer.tokenize(text);
        ParsingOptions opts = new ParsingOptions();
        opts.setPosTag(true);
        opts.setParseRelations(true);
        ParsedData resultData = sParser.parse(Words, opts);
        Word featureWord = new Word(feature);
        int featIndex = Words.indexOf(featureWord);
        Sentiment sentimentResult = Sentiment.Neutral;
        if (!feature.equals("") && featIndex != -1) {
            WordPos theFeature = resultData.getWordByIndex(featIndex);
            return classifyTextByFeatures(resultData, Words, theFeature);
        }
        // sentimentResult = classifyTextByNouns(resultData, Words);
        if (sentimentResult == Sentiment.Neutral) {
            sentimentResult = classifyTextByWords(resultData, Words);
View Full Code Here

Examples of seekfeel.utilities.stanfordwrapper.ParsedData

        ParsingOptions opts = new ParsingOptions();
        opts.setPosTag(true);
        opts.setNeededTags(neededTags);

        ParsedData outData = theParser.parse(allWords, opts);
        ArrayList<WordPos> taggedWords = outData.getTaggedWords();
        SWNEntryKey tempFeatKey;
        SentimentFeature tempFeature;

        for (WordPos word : taggedWords) {
            tempFeatKey = new SWNEntryKey(allWords.get(word.getWordIndex()).word(), TagParser.parseTagToGeneral(word.getWordTag()));
View Full Code Here

Examples of seekfeel.utilities.stanfordwrapper.ParsedData

        TaggedStem temp;
        StanfordParser theParser = new StanfordParser(Language.Arabic);
        ParsingOptions opts = new ParsingOptions();
        opts.setPosTag(true);
        ArrayList<Word> textWords;
        ParsedData parsedOut;
        ArabicStemmerKhoja arabicStemmer = new ArabicStemmerKhoja();
        // Convert the Example into a tagged Stems Array
        textWords = StringToWordsTokenizer.tokenize(example.getDataBody());
        example.getDataBody();
        parsedOut = theParser.parse(textWords, opts);
        for (int i = 0; i < parsedOut.getTaggedWords().size(); i++) {
            temp = new TaggedStem();
            temp.setStemTag(parsedOut.getTaggedWords().get(i).getWordTag());
            temp.setTheStem(arabicStemmer.stem(textWords.get(i).word()));

            if (!taggedStems.contains(temp)) {
                taggedStems.add(temp);
            }
View Full Code Here

Examples of seekfeel.utilities.stanfordwrapper.ParsedData

        neededTags.add(PosTag.VBG);
        neededTags.add(PosTag.VBN);
        neededTags.add(PosTag.VBP);
        neededTags.add(PosTag.VBZ);

        ParsedData parsingResult = null;
        ParsingOptions opts = new ParsingOptions();
        opts.setNeededTags(neededTags);
        opts.setPosTag(true);

View Full Code Here

Examples of seekfeel.utilities.stanfordwrapper.ParsedData

        TaggedStem temp;
        StanfordParser theParser = new StanfordParser(Language.Arabic);
        ParsingOptions opts = new ParsingOptions();
        opts.setPosTag(true);
        ArrayList<Word> textWords;
        ParsedData parsedOut;
        ArabicStemmerKhoja arabicStemmer = new ArabicStemmerKhoja();
        for (DataUnit textUnit : positiveExs) {
            textWords = StringToWordsTokenizer.tokenize(textUnit.getDataBody());
            textUnit.getDataBody();
            parsedOut = theParser.parse(textWords, opts);
            for (int i = 0; i < parsedOut.getTaggedWords().size(); i++) {
                temp = new TaggedStem();
                temp.setStemTag(parsedOut.getTaggedWords().get(i).getWordTag());
                temp.setTheStem(arabicStemmer.stem(textWords.get(i).word()));

                if (!taggedStems.contains(temp)) {
                    taggedStems.add(temp);
                }
            }
        }
        for (DataUnit textUnit : negativeExs) {
            textWords = StringToWordsTokenizer.tokenize(textUnit.getDataBody());
            textUnit.getDataBody();
            parsedOut = theParser.parse(textWords, opts);
            for (int i = 0; i < parsedOut.getTaggedWords().size(); i++) {
                temp = new TaggedStem();
                temp.setStemTag(parsedOut.getTaggedWords().get(i).getWordTag());
                temp.setTheStem(arabicStemmer.stem(textWords.get(i).word()));

                if (!taggedStems.contains(temp)) {
                    taggedStems.add(temp);
                }
View Full Code Here

Examples of seekfeel.utilities.stanfordwrapper.ParsedData

        ParsingOptions popts = new ParsingOptions();
        popts.setParseRelations(true);
        popts.setPosTag(true);

        ParsedData result = sParser.parse(allWords, popts);
        ArrayList<WordPos> wordsPos = result.getTaggedWords();

        setTextDependencyRelations(result.getDependencyRelations());
        tokenizedSentence = new ArrayList<token>();

        int size = allWords.size();

        for (int i = 0; i < size; i++) {
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.