Package com.jgaap.util

Examples of com.jgaap.util.Document


  }
 
  public static List<Document> getDocumentsFromCSV(List<List<String>> documentCSV) throws Exception{
    List<Document> documents = new ArrayList<Document>();
    for(List<String> documentRow : documentCSV){
      Document document = new Document(documentRow.get(1),documentRow.get(0),(documentRow.size()>2?documentRow.get(2):null));
      documents.add(document);
    }
    return documents;
  }
View Full Code Here


    known2.addEvent(new Event("a", null));
    known2.addEvent(new Event("peck", null));
    //known2.setAuthor("Peter");
   
    List<Document> knowns = new ArrayList<Document>();
    Document knownDocument1 = new Document();
    knownDocument1.setAuthor("Mary");
    knownDocument1.addEventSet(null, known1);
    knowns.add(knownDocument1);
    Document knownDocument2 = new Document();
    knownDocument2.setAuthor("Peter");
    knownDocument2.addEventSet(null, known2);
    knowns.add(knownDocument2);

    //Create unknown text
    EventSet unknown1 = new EventSet();

    unknown1.addEvent(new Event("mary", null));
    unknown1.addEvent(new Event("had", null));
    unknown1.addEvent(new Event("a", null));
    unknown1.addEvent(new Event("little", null));
    unknown1.addEvent(new Event("beta", null));

    Document unknownDocument = new Document();
    unknownDocument.addEventSet(null, unknown1);

    //Classify unknown based on the knowns
    WEKAMultilayerPerceptron tree = new WEKAMultilayerPerceptron();
    List<List<Pair<String, Double>>> t = new ArrayList<List<Pair<String,Double>>>();
    tree.train(knowns);
    t.add(tree.analyze(unknownDocument));
    System.out.println(t.toString());

    //Assert that the authors match
    assertTrue(t.get(0).get(0).getFirst().equals("Mary"));
   
   
    //Test 2 - Add in third known author

    EventSet known5 = new EventSet();
   
    known5.addEvent(new Event("she", null));
    known5.addEvent(new Event("sells", null));
    known5.addEvent(new Event("seashells", null));
    known5.addEvent(new Event("by", null));
    known5.addEvent(new Event("seashore", null));
    //known5.setAuthor("Susie");

    Document knownDocument5 = new Document();
    knownDocument5.setAuthor("Susie");
    knownDocument5.addEventSet(null, known5);
    knowns.add(knownDocument5);
   
    t = new ArrayList<List<Pair<String,Double>>>();
    tree.train(knowns);
    t.add(tree.analyze(unknownDocument));
    System.out.println(t.toString());

    assertTrue(t.get(0).get(0).getFirst().equals("Mary"));
   

    //Test 3 - Add in another unknown

    EventSet unknown2 = new EventSet();

    unknown2.addEvent(new Event("peter", null));
    unknown2.addEvent(new Event("piper", null));
    unknown2.addEvent(new Event("picked", null));
    unknown2.addEvent(new Event("a", null));
    unknown2.addEvent(new Event("shells", null));

    Document unknownDocument2 = new Document();
    unknownDocument2.addEventSet(null, unknown2);

    t = new ArrayList<List<Pair<String,Double>>>();
    tree.train(knowns);
    t.add(tree.analyze(unknownDocument));
    t.add(tree.analyze(unknownDocument2));
    System.out.println(t.toString());

    assertTrue(t.get(0).get(0).getFirst().equals("Mary") && t.get(1).get(0).getFirst().equals("Peter"));
   
    // Test 6 - Test unknown that is almost equally likely to be of two authors
   
    EventSet unknown3 = new EventSet();
   
    unknown3.addEvent(new Event("peter", null));
    unknown3.addEvent(new Event("piper", null));
    unknown3.addEvent(new Event("a", null));
    unknown3.addEvent(new Event("little", null));
    unknown3.addEvent(new Event("lamb", null));
   
    Document unknownDocument3 = new Document();
    unknownDocument3.addEventSet(null, unknown3);
   
    //t = tree.analyze(uesv, esv);
    tree = new WEKAMultilayerPerceptron();
    t = new ArrayList<List<Pair<String,Double>>>();
    tree.train(knowns);
    t.add(tree.analyze(unknownDocument3));
    System.out.println(t.toString());
   
    assertTrue(t.get(0).get(0).getSecond()-.5 < .1 && t.get(0).get(1).getSecond()-.5 < .1);
   
    // Test 5 - Add in more known documents for existing authors
    EventSet known3 = new EventSet();
    EventSet known4 = new EventSet();
    EventSet known6 = new EventSet();
   
    known3.addEvent(new Event("mary", null));
    known3.addEvent(new Event("had", null));
    known3.addEvent(new Event("a", null));
    known3.addEvent(new Event("small", null));
    known3.addEvent(new Event("lamb", null));
    //known3.setAuthor("Mary");

    known4.addEvent(new Event("peter", null));
    known4.addEvent(new Event("piper", null));
    known4.addEvent(new Event("collected", null));
    known4.addEvent(new Event("a", null));
    known4.addEvent(new Event("peck", null));
    //known4.setAuthor("Peter");
   
    known6.addEvent(new Event("susie", null));
    known6.addEvent(new Event("sells", null));
    known6.addEvent(new Event("shells", null));
    known6.addEvent(new Event("by", null));
    known6.addEvent(new Event("seashore", null));
    //known6.setAuthor("Susie");

    Document knownDocument3 = new Document();
    knownDocument3.setAuthor("Mary");
    knownDocument3.addEventSet(null, known3);
    knowns.add(knownDocument3);
    Document knownDocument4 = new Document();
    knownDocument4.setAuthor("Peter");
    knownDocument4.addEventSet(null, known4);
    knowns.add(knownDocument4);
    Document knownDocument6 = new Document();
    knownDocument6.setAuthor("Susie");
    knownDocument6.addEventSet(null, known6);
    knowns.add(knownDocument6);
   
    tree = new WEKAMultilayerPerceptron();
    t = new ArrayList<List<Pair<String,Double>>>();
    tree.train(knowns);
View Full Code Here

    unknown.addEvent(new Event("white", null));
    unknown.addEvent(new Event("as", null));
    unknown.addEvent(new Event("snow.", null));

    List<Document> knowns = new ArrayList<Document>();
    Document knownDocument1 = new Document();
    knownDocument1.setAuthor("Mary");
    knownDocument1.addEventSet(null, known1);
    knowns.add(knownDocument1);
    Document knownDocument2 = new Document();
    knownDocument2.setAuthor("Peter");
    knownDocument2.addEventSet(null, known2);
    knowns.add(knownDocument2);
   
    Document unknownDocument = new Document();
    unknownDocument.addEventSet(null, unknown);
   
    AnalysisDriver analysisDriver = new MahalanobisDistance();
    analysisDriver.train(knowns);
    List<Pair<String, Double>> t = analysisDriver.analyze(unknownDocument);
    for(Pair<String, Double> element : t){
View Full Code Here

   * @return - a reference to the document generated
   * @throws Exception - if there is a problem loading the document from file web or parsing file format
   */
  public Document addDocument(String filepath, String author, String title)
      throws Exception {
    Document document = new Document(filepath, author, title);
    return addDocument(document);
  }
View Full Code Here

      }else {
        Iterator<Future<Document>> documentIterator = documentsProcessing.iterator();
        while(documentIterator.hasNext()){
          Future<Document> futureDocument = documentIterator.next();
          if(futureDocument.isDone()){
            Document document = futureDocument.get();
            if(document.hasFailed()){
              throw new Exception("One or more documents could not be read / parsed / canonicized Experiment Failed");
            }
            logger.info("Document: "+document.getTitle()+" has finished processing.");
            documentIterator.remove();
          }
        }
      }
    }
View Full Code Here

TOP

Related Classes of com.jgaap.util.Document

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.