Package org.apache.lucene.document

Examples of org.apache.lucene.document.Document


      else if(i==2) make="4runner";
      else if(i%2 ==0) make="rav4";
      else make = "prius";
     
      String ID = Integer.toString(i);
      Document d=new Document();
      d.add(new Field("id",ID,Field.Store.YES,Index.NOT_ANALYZED_NO_NORMS));
      d.add(new Field("color",color,Field.Store.YES,Index.NOT_ANALYZED_NO_NORMS));
      d.add(new Field("make",make,Field.Store.YES,Index.NOT_ANALYZED_NO_NORMS));
      dataList.add(d);
    }
    return dataList.toArray(new Document[dataList.size()]);
}
View Full Code Here


     
      for (int i=0;i<numcars;++i){
        if (i!=0 && i%1000==0){
          System.out.println(i+" cars indexed.");
        }
        Document doc=new Document();
        int n=rand.nextInt(10);
        if (n==0){
          makeCar(doc,cars[rand.nextInt(cars.length)]);         
        }
        else{
          Document srcDoc=docCache[rand.nextInt(carcount)];
          makeCar(doc,srcDoc);         
        }
       
        populateDocument(doc,null);                               
        handler.handleDocument(doc);       
View Full Code Here

  public Document next() throws IOException{
    while(_reader.isDeleted(curr_docid) && curr_docid<maxdoc){
      curr_docid++;
    }
    if (curr_docid<maxdoc){
      Document doc=_reader.document(curr_docid);
      curr_docid++;
      return doc;
    }
    else{
      return null;
View Full Code Here

   */
  @Override
  public Document getHitDocument(int index) throws RegainException {

    try {
      Document currDoc = (Document) lazyHitList.get(index);
      // The document is empty, so it's created by the factory. Replace it with the real one
      // at this position
      if (currDoc.getFields().isEmpty()) {
        lazyHitList.set(index, mIndexSearcher.doc(hitScoreDocs[index].doc));
      }
    } catch (Exception ex) {
      throw new RegainException("Error while accessing index", ex);
    }
View Full Code Here

   */
  @Override
  public void shortenSummary(int index) throws RegainException {

    try {
      Document document = getHitDocument(index);
      byte[] compressedFieldValue = document.getBinaryValue("summary");
      String text = null;
      if (compressedFieldValue != null) {
        text = CompressionTools.decompressString(compressedFieldValue);
      }

      if (text != null) {
        // Overwrite the content with a shortend summary
        String resSummary = RegainToolkit.createSummaryFromContent(text, 200);
        document.removeField("summary");
        if (resSummary != null) {
          document.add(new Field("summary", resSummary, Field.Store.NO, Field.Index.NOT_ANALYZED));
          document.add(new Field("summary", CompressionTools.compressString(resSummary), Field.Store.YES));
          // write back the transformed document
          setHitDocument(index, document);
        }
      }
    } catch (DataFormatException dataFormatException) {
View Full Code Here

      // We transform this summary into
      // a) a summary matching the search terms (highlighting)
      // b) and a shortend summary (200 characters)
//      int docId = hitScoreDocs[index].doc;

      Document document = getHitDocument(index);
      byte[] compressedFieldValue = document.getBinaryValue("summary");
      String text = null;
      if (compressedFieldValue != null) {
        text = CompressionTools.decompressString(compressedFieldValue);
      }

      if (text != null) {
        // Overwrite the content with a shortend summary
        String resSummary = RegainToolkit.createSummaryFromContent(text, 200);
        document.removeField("summary");
        if (resSummary != null) {
          //System.out.println("resSummary " + resSummary);
          document.add(new Field("summary", resSummary, Field.Store.NO, Field.Index.NOT_ANALYZED));
          document.add(new Field("summary", CompressionTools.compressString(resSummary), Field.Store.YES));

        }

        String resHighlSummary = null;
        // Remove 'html', this works the same way as PageResponse.printNoHTML()
        text = RegainToolkit.replace(text, "<", "&lt;");
        text = RegainToolkit.replace(text, ">", "&gt;");

        TokenStream tokenStream = mAnalyzer.tokenStream("content",
                new StringReader(text));
        // Get 3 best fragments and seperate with a " ... "
        resHighlSummary = highlighter.getBestFragments(tokenStream, text, 3, " ... ");

        if (resHighlSummary != null) {
          //System.out.println("Highlighted summary: " + resHighlSummary);
          // write the result back to the document in a new field
          document.add(new Field("highlightedSummary", resHighlSummary, Field.Store.NO, Field.Index.NOT_ANALYZED));
          document.add(new Field("highlightedSummary", CompressionTools.compressString(resHighlSummary), Field.Store.YES));
        }
      }
      // Highlight the title
      text = document.get("title");
      String resHighlTitle = null;
      if (text != null) {
        TokenStream tokenStream = mAnalyzer.tokenStream("content",
                new StringReader(text));
        // Get the best fragment
        resHighlTitle = highlighter.getBestFragment(tokenStream, text);
      }

      if (resHighlTitle != null) {
        // write the result back to the document in a new field
        //System.out.println("Highlighted title: " + resHighlTitle);
        document.add(new Field("highlightedTitle", resHighlTitle,
                Field.Store.YES, Field.Index.NOT_ANALYZED));

      }
      // write back the transformed document
      setHitDocument(index, document);
View Full Code Here

    format.setMinimumFractionDigits(2);
    formatterHash.put("price",format);
  }
  private static Document makeDocument(Properties prop){

    Document doc=new Document();
    Enumeration nameIter=prop.propertyNames();
    while(nameIter.hasMoreElements()){
      String name=(String)nameIter.nextElement();
      NumberFormat format=formatterHash.get(name);
      String val=prop.getProperty(name);
     
      String[] vals=val.split(",");
      for (int i=0;i<vals.length;++i){
        if (vals[i]!=null && vals[i].length()>0){
          if (format!=null){
            vals[i]=format.format(Double.parseDouble(vals[i]));
          }
          doc.add(new Field(name,vals[i],Field.Store.NO,Field.Index.NOT_ANALYZED,Field.TermVector.NO));
        }
      } 
    }
   
    /*String[] tags=tagsMaker.getTags(prop);
View Full Code Here

        line=reader.readLine();
        if (line==null){
          break;
        }
        if ("<EOD>".equals(line)){    //new record
          Document doc=makeDocument(prop);
          populateDocument(doc,null);
          handler.handleDocument(doc);
          prop=new Properties();
        }
        else{
View Full Code Here

      Queue<Request> batchWork = new LinkedList<Request>();
      boolean cutOff = false;
      boolean isFromTake = true;
      Request req = inputQueue.take();   
      while (req != null && cutOff == false && batchWork.size() < MAX_WORK_SIZE) {       
        Document doc = req.doc;
        if(doc != null){
          if(POISON_DOC.equals(doc) == true){
            cutOff = true;           
          }else{
            String md5Url = doc.get(DocumentCreator.FIELD_URL_MD5);
            if(md5Url != null){
              if(seenMD5Url.contains(md5Url) == true){
                cutOff = true;
              }else{
                seenMD5Url.add(md5Url);
View Full Code Here

          Request aReq = workQueue.poll();
          if(aReq != null && aReq.type != RST){
            writer = createIndexWriter();
          }
          while (aReq != null) {
            Document doc = aReq.doc;
            if (doc != null && POISON_DOC.equals(doc)) {
              logger.debug("Terminate UpdateIndexWorker.");
              stopRunning = true;
            } else if (aReq.type == RST) {
              logger.debug("===================================> Do RESET.");
View Full Code Here

TOP

Related Classes of org.apache.lucene.document.Document

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.