Package gannuNLP.corpus

Examples of gannuNLP.corpus.Corpus


        System.out.println("Dictionary uploaded!");
        System.out.println("Loading samples from SemCor files!");
        f=new File(args[2]);
        if(f.exists())
        {
          Corpus c=new Corpus(args[2],db, true);
          DataLoader.addSourceList(sourceList,c.getName());
          c.WriteSuperLemmas("./data/"+dict.getName()+"/");         
          System.out.println("Finished!");
        }
        else
        {
          System.out.println("Corpus not found!");
View Full Code Here


    this.name="CorpusMFS";
  }
  @Override
  public void init(Input document) throws Exception {
    if (CorpusMFS.corpus==null)
      CorpusMFS.corpus=new Corpus(this.getValue("corpus"),this.dict,Boolean.parseBoolean(this.getValue("includeNoTags")));
    if(this.getValue("osd")==null)
    {
      this.osd=true;
    }
    else
    {
      this.osd=Boolean.parseBoolean(this.getValue("osd"));
    }   
    this.current=new Corpus(CorpusMFS.corpus);   
    if(this.getValue("setup")!=null&&this.getValue("setup").equals("filterMFS"))
    {     
      this.current=new Corpus(CorpusMFS.corpus);
      String parameters="";
      if(this.getValue("threshold")==null)
      {
        parameters+="threshold:0.2;";
      }
View Full Code Here

    super("AddCorpusRelatedLemmas");
  }

  @Override
  public void init()throws Exception {
    AddCorpusRelatedLemmas.corpus=new Corpus(this.getValue("corpus"),this.dict,true);
    this.kw=new KeywordsByTFIDF();   
  }
View Full Code Here

   * This method removes all the duplicated words from all the bag of words of a target lemma.
   * @param lemma The target lemma.
   */
  public void modifyBow(Lemma lemma) throws Exception {
    ContainsLemmaFilter filter=new ContainsLemmaFilter("");   
    Corpus aux=new Corpus(corpus);
    filter.filter(aux, lemma);
    ArrayList<WSM> wsm=new ArrayList<WSM>();
    for(Input document:aux.getDocuments())
    {
      wsm.addAll(this.kw.extractKeywords(document, 10, true));
    }
    for(Sense s:lemma.getSenses())
    {
View Full Code Here

TOP

Related Classes of gannuNLP.corpus.Corpus

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.