Package kpi.asoiu.parsers

Source Code of kpi.asoiu.parsers.ParsePhrase

package kpi.asoiu.parsers;

import kpi.asoiu.factory.DAOFactory;
import kpi.asoiu.model.PartsOfSentence;
import kpi.asoiu.model.Phrase;
import kpi.asoiu.model.Word;
import kpi.asoiu.model.test.PhraseLevelEntity;
import opennlp.tools.parser.Parse;

import java.util.*;

/**
* Created by IntelliJ IDEA.
* User: Dara
* Date: 01.04.12
* Time: 17:39
* To change this template use File | Settings | File Templates.
*/
public class ParsePhrase {
    private static List<PhraseLevelEntity> phrases;

    List<String> objectTypes = Arrays.asList("IN", "NP", "NN", "NS", "NNS");
    List<String> predicateTypes = Arrays.asList("VBZ", "VBP", "VBN", "VP");
    List<String> subjectTypes = Arrays.asList("NNP", "NN", "NP", "NS", "NNS", "PRP");
    Map<PartsOfSentence, List<String>> types = new HashMap<PartsOfSentence, List<String>>();

    public ParsePhrase() {
        phrases = DAOFactory.getDao().getPhraseLevelEntities();
        types.put(PartsOfSentence.OBJECT, objectTypes);
        types.put(PartsOfSentence.SUBJECT, subjectTypes);
        types.put(PartsOfSentence.PREDICATE, predicateTypes);

    }

    public Phrase parse(Parse parse, PartsOfSentence partsOfSentence) {
        Phrase phrase = new Phrase();
        collect(parse, phrase, partsOfSentence);
//        System.out.println("!!! phrase = " + phrase);
        return phrase;
    }

    private void postProcess(Phrase phrase) {
        List<Word> newWords = new ArrayList<Word>();
        if (phrase.getChildren().size() == 1) {
            newWords = phrase.getChildren().get(0).getWords();
        } else {
            for (int i = 0; i < phrase.getChildren().size(); i++) {
                for (int j = i + 1; j < phrase.getChildren().size(); j++) {
                    newWords = mergeWords(phrase.getChildren().get(i).getWords(), phrase.getChildren().get(j).getWords());
                }
            }
        }
        phrase.setWords(mergeWords(phrase.getWords(), newWords));
    }

    private List<Word> mergeWords(List<Word> words1, List<Word> words2) {
        List<Word> newWords = new ArrayList<Word>();
        if (words1.isEmpty()) {
            newWords.addAll(words2);
        } else if (words2.isEmpty()) {
            newWords.addAll(words1);
        } else {
            for (Word word : words1) {
                for (Word word2 : words2) {
                    Word w = new Word();
                    w.setWord(word.getWord() + " " + word2.getWord());
                    w.setPartOfSentence(word.getPartOfSentence());
                    w.setPartOfLanguage(word.getPartOfLanguage());
                    newWords.add(w);
                }
            }
        }
        return newWords;
    }

    private Phrase collect(Parse p, Phrase phrase, PartsOfSentence partsOfSentence) {
        for (Parse parse : p.getChildren()) {
            System.out.println(parse.getType() + "_" + parse.getLabel() + "-" + parse.getHead() + " = " + partsOfSentence + "      " + parse);
            if (isParseIsPhrase(parse)) {
                phrase.getChildren().add(collect(parse, new Phrase(), partsOfSentence));
            } else {
                if (types.get(partsOfSentence).contains(parse.getType())) {
                    Word w = new Word();
                    w.setWord(parse.toString());
                    w.setPartOfLanguage(parse.getType());
                    w.setPartOfSentence(partsOfSentence);
                    phrase.getWords().add(w);
                }
            }
        }
        postProcess(phrase);
        return phrase;
    }

    public static boolean isParseIsPhrase(Parse p) {
        if (p.getType() != null) {
            for (PhraseLevelEntity phraseLevelEntity : phrases) {
                if (phraseLevelEntity.getName().equals(p.getType())) {
                    return true;
                }
            }
        }

        return false;
    }
}
TOP

Related Classes of kpi.asoiu.parsers.ParsePhrase

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.