Package uk.ac.ucl.panda.utility.structure

Examples of uk.ac.ucl.panda.utility.structure.DocData


        System.out.println("ignoring date parse exception (assigning 'now') for: "+props.getProperty("date"));
        date = new Date(); // now
      }
    }
     
    return new DocData(name, bodyBuf.toString(), title, props, date);
  }
View Full Code Here


   (non-Javadoc)
   * @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#makeDocument()
   */
  public Document makeDocument () throws Exception {
    resetLeftovers();
    DocData docData = getNextDocData();
    ///////////////////
    if(docData==null)return null;
    ///////////////////
    Document doc = createDocument(docData,0,-1);
    return doc;
View Full Code Here

  public Document makeDocument(int size) throws Exception {
    LeftOver lvr = (LeftOver) leftovr.get();
    if (lvr==null || lvr.docdata==null || lvr.docdata.getBody()==null || lvr.docdata.getBody().length()==0) {
      resetLeftovers();
    }
    DocData dd = (lvr==null ? getNextDocData() : lvr.docdata);
    int cnt = (lvr==null ? 0 : lvr.cnt);
    while (dd.getBody()==null || dd.getBody().length()<size) {
      DocData dd2 = dd;
      dd = getNextDocData();
      cnt = 0;
      dd.setBody(dd2.getBody() + dd.getBody());
    }
    Document doc = createDocument(dd,size,cnt);
    if (dd.getBody()==null || dd.getBody().length()==0) {
      resetLeftovers();
    } else {
View Full Code Here

    // 6. collect until end of doc
    sb = read("</DOC>",null,false,true);
    // this is the next document, so parse it
    Date date = new Date();
    HTMLParser p = getHtmlParser();
    DocData docData = p.parse(name, date, sb, getDateFormat(0));
    addBytes(sb.length()); // count char length of parsed html text (larger than the plain doc body text).
   
    return docData;
  }
View Full Code Here

TOP

Related Classes of uk.ac.ucl.panda.utility.structure.DocData

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.