Package org.apache.lucene.index

Source Code of org.apache.lucene.index.FieldsWriter

package org.apache.lucene.index;

/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

import java.io.IOException;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.IOUtils;

import com.alimama.mdrill.fdtBlockCompress.FdtCompressIndexInput;
import com.alimama.mdrill.fdtBlockCompress.FdtCompressIndexOutput;

public class FieldsWriter {
    private static final Log LOG = LogFactory.getLog(FieldsWriter.class);

  static final int FIELD_IS_TOKENIZED = 1 << 0;
  static final int FIELD_IS_BINARY = 1 << 1;

  /** @deprecated Kept for backwards-compatibility with <3.0 indexes; will be removed in 4.0 */
  @Deprecated
  static final int FIELD_IS_COMPRESSED = 1 << 2;

  private static final int _NUMERIC_BIT_SHIFT = 3;
  static final int FIELD_IS_NUMERIC_MASK = 0x07 << _NUMERIC_BIT_SHIFT;

  static final int FIELD_IS_NUMERIC_INT = 1 << _NUMERIC_BIT_SHIFT;
  static final int FIELD_IS_NUMERIC_LONG = 2 << _NUMERIC_BIT_SHIFT;
  static final int FIELD_IS_NUMERIC_FLOAT = 3 << _NUMERIC_BIT_SHIFT;
  static final int FIELD_IS_NUMERIC_DOUBLE = 4 << _NUMERIC_BIT_SHIFT;
  // currently unused: static final int FIELD_IS_NUMERIC_SHORT = 5 << _NUMERIC_BIT_SHIFT;
  // currently unused: static final int FIELD_IS_NUMERIC_BYTE = 6 << _NUMERIC_BIT_SHIFT;

  // the next possible bits are: 1 << 6; 1 << 7
 
  // Original format
  static final int FORMAT = 0;

  // Changed strings to UTF8
  static final int FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = 1;
 
  // Lucene 3.0: Removal of compressed fields
  static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2;

  // Lucene 3.2: NumericFields are stored in binary format
  static final int FORMAT_LUCENE_3_2_NUMERIC_FIELDS = 3;

  // NOTE: if you introduce a new format, make it 1 higher
  // than the current one, and always change this if you
  // switch to a new format!
  static final int FORMAT_CURRENT = FORMAT_LUCENE_3_2_NUMERIC_FIELDS;
 
  private FieldInfos fieldInfos;

  // If null - we were supplied with streams, if notnull - we manage them ourselves
  private Directory directory;
  private String segment;
  private IndexOutput fieldsStream;
  private IndexOutput indexStream;
 
 

  FieldsWriter(Directory directory, String segment, FieldInfos fn) throws IOException {
    this.directory = directory;
    this.segment = segment;
    fieldInfos = fn;

    boolean success = false;
    try {

      IndexOutput fdt = directory.createOutput(IndexFileNames.segmentFileName(segment, IndexFileNames.FIELDS_EXTENSION));
      indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, IndexFileNames.FIELDS_INDEX_EXTENSION));

      if(FieldsWriterCompress.isFdtCompress()&&!(directory instanceof RAMDirectory))
      {
          indexStream.writeInt(FieldsWriterCompress.FORMAT_CURRENT);
          fieldsStream=new FdtCompressIndexOutput(fdt,1024*512);
          fieldsStream.writeInt(FieldsWriterCompress.FORMAT_CURRENT);

      }else{
          fdt.writeInt(FORMAT_CURRENT);
          indexStream.writeInt(FORMAT_CURRENT);
          fieldsStream=fdt;
          fieldsStream.writeInt(FieldsWriterCompress.FORMAT_CURRENT);
      }

      success = true;
    } finally {
      if (!success) {
        abort();
      }
    }
  }

  FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) {
    directory = null;
    segment = null;
    fieldInfos = fn;
    fieldsStream = fdt;
    indexStream = fdx;
  }

  void setFieldsStream(IndexOutput stream) {
    this.fieldsStream = stream;
  }

  // Writes the contents of buffer into the fields stream
  // and adds a new entry for this document into the index
  // stream.  This assumes the buffer was already written
  // in the correct fields format.
  void flushDocument(int numStoredFields, RAMOutputStream buffer) throws IOException {
    long pos=fieldsStream.getFilePointer();
//    LOG.info("flushDocument:"+pos+","+numStoredFields);
    indexStream.writeLong(pos);
    fieldsStream.writeVInt(numStoredFields);
    buffer.writeTo(fieldsStream);
  }

  void skipDocument() throws IOException {
    long pos=fieldsStream.getFilePointer();
//    LOG.info("skipDocument:"+pos);
    indexStream.writeLong(pos);
    fieldsStream.writeVInt(0);
  }

  void close() throws IOException {
    if (directory != null) {
      try {
        IOUtils.close(fieldsStream, indexStream);
      } finally {
        fieldsStream = indexStream = null;
      }
    }
  }

  void abort() {
    if (directory != null) {
      try {
        close();
      } catch (IOException ignored) {
      }
      try {
        directory.deleteFile(IndexFileNames.segmentFileName(segment, IndexFileNames.FIELDS_EXTENSION));
      } catch (IOException ignored) {
      }
      try {
        directory.deleteFile(IndexFileNames.segmentFileName(segment, IndexFileNames.FIELDS_INDEX_EXTENSION));
      } catch (IOException ignored) {
      }
    }
  }

  final void writeField(FieldInfo fi, Fieldable field) throws IOException {
      fieldsStream.writeVInt(fi.number);
    int bits = 0;
    if (field.isTokenized())
      bits |= FIELD_IS_TOKENIZED;
    if (field.isBinary())
      bits |= FIELD_IS_BINARY;
    if (field instanceof NumericField) {
      switch (((NumericField) field).getDataType()) {
        case INT:
          bits |= FIELD_IS_NUMERIC_INT; break;
        case LONG:
          bits |= FIELD_IS_NUMERIC_LONG; break;
        case FLOAT:
          bits |= FIELD_IS_NUMERIC_FLOAT; break;
        case DOUBLE:
          bits |= FIELD_IS_NUMERIC_DOUBLE; break;
        default:
          assert false : "Should never get here";
      }
    }
    fieldsStream.writeByte((byte) bits);

    if (field.isBinary()) {
      final byte[] data;
      final int len;
      final int offset;
      data = field.getBinaryValue();
      len = field.getBinaryLength();
      offset =  field.getBinaryOffset();

      fieldsStream.writeVInt(len);
      fieldsStream.writeBytes(data, offset, len);
    } else if (field instanceof NumericField) {
      final NumericField nf = (NumericField) field;
      final Number n = nf.getNumericValue();
      switch (nf.getDataType()) {
        case INT:
            fieldsStream.writeVVInt(n.intValue()); break;
        case LONG:
            fieldsStream.writeVVLong(n.longValue()); break;
        case FLOAT:
            fieldsStream.writeVVVInt(Float.floatToIntBits(n.floatValue())); break;
        case DOUBLE:
            fieldsStream.writeVVVLong(Double.doubleToLongBits(n.doubleValue())); break;
        default:
          assert false : "Should never get here";
      }
    } else {
  fieldsStream.writeString(field.stringValue());
    }
  }

  final void addRawDocuments(final IndexInput stream, long[] lengthsstart,long[] lengthsend, int numDocs) throws IOException {
    if(stream instanceof FdtCompressIndexInput)
    {
      FdtCompressIndexInput inputstream=(FdtCompressIndexInput)stream;
        for(int i=0;i<numDocs;i++) {
        long position = fieldsStream.getFilePointer();
          indexStream.writeLong(position);
//          LOG.info("addRawDocuments 1 "+position+","+lengthsstart[i]+","+lengthsend[i]);
          inputstream.writeToPos(fieldsStream,lengthsend[i]);
        }
      return ;
    }
   
    if(fieldsStream instanceof FdtCompressIndexOutput)
    {
        for(int i=0;i<numDocs;i++) {
          long position = fieldsStream.getFilePointer();
            indexStream.writeLong(position);
//            LOG.info("addRawDocuments 2 "+position+","+lengthsstart[i]+","+lengthsend[i]);
            long end=lengthsend[i];
            if(end==-1)
            {
              end=stream.length();
            }
          fieldsStream.copyBytes(stream, end-lengthsstart[i]);
        }
     
      return ;
    }
   
      long position = fieldsStream.getFilePointer();
      long start = position;
      for(int i=0;i<numDocs;i++) {
        indexStream.writeLong(position);
//        LOG.info("addRawDocuments 3 "+position+","+lengthsstart[i]+","+lengthsend[i]);

        long end=lengthsend[i];
        if(end==-1)
        {
          end=stream.length();
        }
        position += end-lengthsstart[i];
      }
      fieldsStream.copyBytes(stream, position-start);
    }

  final void addDocument(Document doc) throws IOException {
    long pos=fieldsStream.getFilePointer();
    indexStream.writeLong(pos);

//    LOG.info("addDocument "+pos);

    int storedCount = 0;
    List<Fieldable> fields = doc.getFields();
    for (Fieldable field : fields) {
      if (field.isStored())
          storedCount++;
    }
    fieldsStream.writeVInt(storedCount);

    for (Fieldable field : fields) {
      if (field.isStored())
      {
        writeField(fieldInfos.fieldInfo(field.name()), field);
      }
    }
   
  }
 

 
}
TOP

Related Classes of org.apache.lucene.index.FieldsWriter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.