Package org.apache.lucene.document

Examples of org.apache.lucene.document.Fieldable


         // mark the document that reindexing is required
         copy.add(new Field(FieldNames.REINDEXING_REQUIRED, "", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
         Iterator fields = doc.getFields().iterator();
         while (fields.hasNext())
         {
            Fieldable f = (Fieldable)fields.next();
            Fieldable field = null;
            Field.TermVector tv = getTermVectorParameter(f);
            Field.Store stored = getStoreParameter(f);
            Field.Index indexed = getIndexParameter(f);
            if (f instanceof LazyTextExtractorField || f.readerValue() != null)
            {
               // replace all readers with empty string reader
               field = new Field(f.name(), new StringReader(""), tv);
            }
            else if (f.stringValue() != null)
            {
               field = new Field(f.name(), f.stringValue(), stored, indexed, tv);
            }
            else if (f.isBinary())
            {
               field = new Field(f.name(), f.binaryValue(), stored);
            }
            if (field != null)
            {
               field.setOmitNorms(f.getOmitNorms());
               copy.add(field);
            }
         }
         // schedule the original document for later indexing
         Document existing = indexingQueue.addDocument(doc);
View Full Code Here


      {
         public Object run()
         {
            for (Iterator it = old.getFields().iterator(); it.hasNext();)
            {
               Fieldable f = (Fieldable)it.next();
               try
               {
                  if (f.readerValue() != null)
                  {
                     f.readerValue().close();
                  }
                  else if (f instanceof LazyTextExtractorField)
                  {
                     LazyTextExtractorField field = (LazyTextExtractorField)f;
                     field.dispose();
View Full Code Here

    */
   public static boolean isDocumentReady(Document doc)
   {
      for (Iterator it = doc.getFields().iterator(); it.hasNext();)
      {
         Fieldable f = (Fieldable)it.next();
         if (f instanceof LazyTextExtractorField)
         {
            LazyTextExtractorField field = (LazyTextExtractorField)f;
            if (!field.isExtractorFinished())
            {
View Full Code Here

 
  private MemoryIndex createMemoryIndex(Document doc) {
    MemoryIndex index = new MemoryIndex();
    Iterator<Fieldable> iter = doc.getFields().iterator();
    while (iter.hasNext()) {
      Fieldable field = iter.next();
      index.addField(field.name(), field.stringValue(), analyzer);
    }
    return index;
  }
View Full Code Here

            //get the row number stored at the index
            //that number is the row number of the decorated
            //table model row that we are mapping to
            for (int t=0; t<hits.length; t++){
                Document document = searcher.doc(hits[t].doc);
                Fieldable field = document.getField(ROW_NUMBER);
                rowToModelIndex.add(Integer.valueOf(field.stringValue()));
            }
        } catch (Exception e){
            e.printStackTrace();
        }
    }
View Full Code Here

            //get the row number stored at the index
            //that number is the row number of the decorated
            //table model row that we are mapping to
            for (int t=0; t<hits.length; t++){
                Document document = searcher.doc(hits[t].doc);
                Fieldable field = document.getField(ROW_NUMBER);
                rowToModelIndex.add(Integer.valueOf(field.stringValue()));
            }
        } catch (Exception e){
            e.printStackTrace();
        }
    }
View Full Code Here

        FieldSelector sel = new SetBasedFieldSelector(new HashSet<String>(), lazyFields);
        doc = reader.document(reader.maxDoc() - 1, sel);
        Fieldable[] fieldables = doc.getFieldables("bin1");
        assertNotNull(fieldables);
        assertEquals(1, fieldables.length);
        Fieldable fb1 = fieldables[0];
        assertTrue(fb1.isBinary());
        assertEquals(bin.length, fb1.getBinaryLength());
        data1 = fb1.getBinaryValue();
        assertEquals(bin.length, fb1.getBinaryLength());
        for (int i = 0; i < bin.length; i++) {
          assertEquals(bin[i], data1[i + fb1.getBinaryOffset()]);
        }
        reader.close();
        // force optimize

View Full Code Here

    Map<Fieldable, LinkedList<Token>> tokensByField = new LinkedHashMap<Fieldable, LinkedList<Token>>(20);

    // tokenize indexed fields.
    for (Iterator<Fieldable> it = document.getDocument().getFields().iterator(); it.hasNext();) {

      Fieldable field = it.next();

      FieldSetting fieldSetting = fieldSettingsByFieldName.get(field.name());

      if (field.isIndexed()) {

        LinkedList<Token> tokens = new LinkedList<Token>();
        tokensByField.put(field, tokens);

        if (field.isTokenized()) {
          final TokenStream tokenStream;
          // todo readerValue(), binaryValue()
          if (field.tokenStreamValue() != null) {
            tokenStream = field.tokenStreamValue();
          } else {
            tokenStream = analyzer.reusableTokenStream(field.name(), new StringReader(field.stringValue()));
          }

          // reset the TokenStream to the first token         
          tokenStream.reset();

          while (tokenStream.incrementToken()) {
            // TODO: this is a simple workaround to still work with tokens, not very effective, but as far as I know, this writer should get removed soon:
            final Token token = new Token();
            for (Iterator<AttributeImpl> atts = tokenStream.getAttributeImplsIterator(); atts.hasNext();) {
              final AttributeImpl att = atts.next();
              try {
                att.copyTo(token);
              } catch (Exception e) {
                // ignore unsupported attributes,
                // this may fail to copy some attributes, if a special combined AttributeImpl is used, that
                // implements basic attributes supported by Token and also other customized ones in one class.
              }
            }
            tokens.add(token); // the vector will be built on commit.
            fieldSetting.fieldLength++;
            if (fieldSetting.fieldLength > maxFieldLength) {
              break;
            }
          }
          tokenStream.end();
          tokenStream.close();
        } else {
          // untokenized
          String fieldVal = field.stringValue();
          Token token = new Token(0, fieldVal.length(), "untokenized");
          token.setEmpty().append(fieldVal);
          tokens.add(token);
          fieldSetting.fieldLength++;
        }
      }

      if (!field.isStored()) {
        it.remove();
      }
    }

View Full Code Here

    final boolean doInvert = consumer.start(fields, count);

    for(int i=0;i<count;i++) {

      final Fieldable field = fields[i];

      // TODO FI: this should be "genericized" to querying
      // consumer if it wants to see this particular field
      // tokenized.
      if (field.isIndexed() && doInvert) {

        final boolean anyToken;
       
        if (fieldState.length > 0)
          fieldState.position += docState.analyzer.getPositionIncrementGap(fieldInfo.name);

        if (!field.isTokenized()) {      // un-tokenized field
          String stringValue = field.stringValue();
          final int valueLength = stringValue.length();
          perThread.singleToken.reinit(stringValue, 0, valueLength);
          fieldState.attributeSource = perThread.singleToken;
          consumer.start(field);

          boolean success = false;
          try {
            consumer.add();
            success = true;
          } finally {
            if (!success)
              docState.docWriter.setAborting();
          }
          fieldState.offset += valueLength;
          fieldState.length++;
          fieldState.position++;
          anyToken = valueLength > 0;
        } else {                                  // tokenized field
          final TokenStream stream;
          final TokenStream streamValue = field.tokenStreamValue();

          if (streamValue != null)
            stream = streamValue;
          else {
            // the field does not have a TokenStream,
            // so we have to obtain one from the analyzer
            final Reader reader;        // find or make Reader
            final Reader readerValue = field.readerValue();

            if (readerValue != null)
              reader = readerValue;
            else {
              String stringValue = field.stringValue();
              if (stringValue == null)
                throw new IllegalArgumentException("field must have either TokenStream, String or Reader value");
              perThread.stringReader.init(stringValue);
              reader = perThread.stringReader;
            }
         
            // Tokenize field and add to postingTable
            stream = docState.analyzer.reusableTokenStream(fieldInfo.name, reader);
          }

          // reset the TokenStream to the first token
          stream.reset();

          final int startLength = fieldState.length;
         
          try {
            int offsetEnd = fieldState.offset-1;
           
            boolean hasMoreTokens = stream.incrementToken();

            fieldState.attributeSource = stream;

            OffsetAttribute offsetAttribute = fieldState.attributeSource.addAttribute(OffsetAttribute.class);
            PositionIncrementAttribute posIncrAttribute = fieldState.attributeSource.addAttribute(PositionIncrementAttribute.class);
           
            consumer.start(field);
           
            for(;;) {

              // If we hit an exception in stream.next below
              // (which is fairly common, eg if analyzer
              // chokes on a given document), then it's
              // non-aborting and (above) this one document
              // will be marked as deleted, but still
              // consume a docID
             
              if (!hasMoreTokens) break;
             
              final int posIncr = posIncrAttribute.getPositionIncrement();
              fieldState.position += posIncr;
              if (fieldState.position > 0) {
                fieldState.position--;
              }

              if (posIncr == 0)
                fieldState.numOverlap++;

              boolean success = false;
              try {
                // If we hit an exception in here, we abort
                // all buffered documents since the last
                // flush, on the likelihood that the
                // internal state of the consumer is now
                // corrupt and should not be flushed to a
                // new segment:
                consumer.add();
                success = true;
              } finally {
                if (!success)
                  docState.docWriter.setAborting();
              }
              fieldState.position++;
              offsetEnd = fieldState.offset + offsetAttribute.endOffset();
              if (++fieldState.length >= maxFieldLength) {
                if (docState.infoStream != null)
                  docState.infoStream.println("maxFieldLength " +maxFieldLength+ " reached for field " + fieldInfo.name + ", ignoring following tokens");
                break;
              }

              hasMoreTokens = stream.incrementToken();
            }
            // trigger streams to perform end-of-stream operations
            stream.end();
           
            fieldState.offset += offsetAttribute.endOffset();
            anyToken = fieldState.length > startLength;
          } finally {
            stream.close();
          }
        }

        if (anyToken)
          fieldState.offset += docState.analyzer.getOffsetGap(field);
        fieldState.boost *= field.getBoost();
      }

      // LUCENE-2387: don't hang onto the field, so GC can
      // reclaim
      fields[i] = null;
View Full Code Here

    iw.close();
  }

  private void addDoc(IndexWriter iw, int i) throws Exception {
    Document d = new Document();
    Fieldable f;
    int scoreAndID = i+1;
   
    f = new Field(ID_FIELD,id2String(scoreAndID),Field.Store.YES,Field.Index.NOT_ANALYZED); // for debug purposes
    f.setOmitNorms(true);
    d.add(f);
   
    f = new Field(TEXT_FIELD,"text of doc"+scoreAndID+textLine(i),Field.Store.NO,Field.Index.ANALYZED); // for regular search
    f.setOmitNorms(true);
    d.add(f);
   
    f = new Field(INT_FIELD,""+scoreAndID,Field.Store.NO,Field.Index.NOT_ANALYZED); // for function scoring
    f.setOmitNorms(true);
    d.add(f);
   
    f = new Field(FLOAT_FIELD,scoreAndID+".000",Field.Store.NO,Field.Index.NOT_ANALYZED); // for function scoring
    f.setOmitNorms(true);
    d.add(f);

    iw.addDocument(d);
    log("added: "+d);
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.document.Fieldable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.