package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.StringHelper;
import java.io.IOException;
import java.util.*;
/** Access to the Fieldable Info file that describes document fields and whether or
* not they are indexed. Each segment has a separate Fieldable Info file. Objects
* of this class are thread-safe for multiple readers, but only one thread can
* be adding documents at a time, with no other reader or writer threads
* accessing this object.
*/
public final class FieldInfos {
// Used internally (ie not written to *.fnm files) for pre-2.9 files
public static final int FORMAT_PRE = -1;
// First used in 2.9; prior to 2.9 there was no format header
public static final int FORMAT_START = -2;
// First used in 3.4: omit only positional information
public static final int FORMAT_OMIT_POSITIONS = -3;
// whenever you add a new format, make it 1 smaller (negative version logic)!
static final int CURRENT_FORMAT = FORMAT_OMIT_POSITIONS;
static final byte IS_INDEXED = 0x1;
static final byte STORE_TERMVECTOR = 0x2;
static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x4;
static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x8;
static final byte OMIT_NORMS = 0x10;
static final byte STORE_PAYLOADS = 0x20;
static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;
static final byte OMIT_POSITIONS = -128;
private final ArrayList<FieldInfo> byNumber = new ArrayList<FieldInfo>();
private final HashMap<String,FieldInfo> byName = new HashMap<String,FieldInfo>();
private int format;
FieldInfos() { }
/**
* Construct a FieldInfos object using the directory and the name of the file
* IndexInput
* @param d The directory to open the IndexInput from
* @param name The name of the file to open the IndexInput from in the Directory
* @throws IOException
*/
FieldInfos(Directory d, String name) throws IOException {
IndexInput input = d.openInput(name);
try {
try {
read(input, name);
} catch (IOException ioe) {
if (format == FORMAT_PRE) {
// LUCENE-1623: FORMAT_PRE (before there was a
// format) may be 2.3.2 (pre-utf8) or 2.4.x (utf8)
// encoding; retry with input set to pre-utf8
input.seek(0);
input.setModifiedUTF8StringsMode();
byNumber.clear();
byName.clear();
try {
read(input, name);
} catch (Throwable t) {
// Ignore any new exception & throw original IOE
throw ioe;
}
} else {
// The IOException cannot be caused by
// LUCENE-1623, so re-throw it
throw ioe;
}
}
} finally {
input.close();
}
}
/**
* Returns a deep clone of this FieldInfos instance.
*/
@Override
synchronized public Object clone() {
FieldInfos fis = new FieldInfos();
final int numField = byNumber.size();
for(int i=0;i<numField;i++) {
FieldInfo fi = (FieldInfo) ( byNumber.get(i)).clone();
fis.byNumber.add(fi);
fis.byName.put(fi.name, fi);
}
return fis;
}
/** Adds field info for a Document. */
synchronized public void add(Document doc) {
List<Fieldable> fields = doc.getFields();
for (Fieldable field : fields) {
add(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
field.isStoreOffsetWithTermVector(), field.getOmitNorms(), false, field.getIndexOptions());
}
}
/** Returns true if any fields do not omitTermFreqAndPositions */
boolean hasProx() {
final int numFields = byNumber.size();
for(int i=0;i<numFields;i++) {
final FieldInfo fi = fieldInfo(i);
if (fi.isIndexed && fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
return true;
}
}
return false;
}
/**
* Add fields that are indexed. Whether they have termvectors has to be specified.
*
* @param names The names of the fields
* @param storeTermVectors Whether the fields store term vectors or not
* @param storePositionWithTermVector true if positions should be stored.
* @param storeOffsetWithTermVector true if offsets should be stored
*/
synchronized public void addIndexed(Collection<String> names, boolean storeTermVectors, boolean storePositionWithTermVector,
boolean storeOffsetWithTermVector) {
for (String name : names) {
add(name, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector);
}
}
/**
* Assumes the fields are not storing term vectors.
*
* @param names The names of the fields
* @param isIndexed Whether the fields are indexed or not
*
* @see #add(String, boolean)
*/
synchronized public void add(Collection<String> names, boolean isIndexed) {
for (String name : names) {
add(name, isIndexed);
}
}
/**
* Calls 5 parameter add with false for all TermVector parameters.
*
* @param name The name of the Fieldable
* @param isIndexed true if the field is indexed
* @see #add(String, boolean, boolean, boolean, boolean)
*/
synchronized public void add(String name, boolean isIndexed) {
add(name, isIndexed, false, false, false, false);
}
/**
* Calls 5 parameter add with false for term vector positions and offsets.
*
* @param name The name of the field
* @param isIndexed true if the field is indexed
* @param storeTermVector true if the term vector should be stored
*/
synchronized public void add(String name, boolean isIndexed, boolean storeTermVector){
add(name, isIndexed, storeTermVector, false, false, false);
}
/** If the field is not yet known, adds it. If it is known, checks to make
* sure that the isIndexed flag is the same as was given previously for this
* field. If not - marks it as being indexed. Same goes for the TermVector
* parameters.
*
* @param name The name of the field
* @param isIndexed true if the field is indexed
* @param storeTermVector true if the term vector should be stored
* @param storePositionWithTermVector true if the term vector with positions should be stored
* @param storeOffsetWithTermVector true if the term vector with offsets should be stored
*/
synchronized public void add(String name, boolean isIndexed, boolean storeTermVector,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) {
add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false);
}
/** If the field is not yet known, adds it. If it is known, checks to make
* sure that the isIndexed flag is the same as was given previously for this
* field. If not - marks it as being indexed. Same goes for the TermVector
* parameters.
*
* @param name The name of the field
* @param isIndexed true if the field is indexed
* @param storeTermVector true if the term vector should be stored
* @param storePositionWithTermVector true if the term vector with positions should be stored
* @param storeOffsetWithTermVector true if the term vector with offsets should be stored
* @param omitNorms true if the norms for the indexed field should be omitted
*/
synchronized public void add(String name, boolean isIndexed, boolean storeTermVector,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) {
add(name, isIndexed, storeTermVector, storePositionWithTermVector,
storeOffsetWithTermVector, omitNorms, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
}
/** If the field is not yet known, adds it. If it is known, checks to make
* sure that the isIndexed flag is the same as was given previously for this
* field. If not - marks it as being indexed. Same goes for the TermVector
* parameters.
*
* @param name The name of the field
* @param isIndexed true if the field is indexed
* @param storeTermVector true if the term vector should be stored
* @param storePositionWithTermVector true if the term vector with positions should be stored
* @param storeOffsetWithTermVector true if the term vector with offsets should be stored
* @param omitNorms true if the norms for the indexed field should be omitted
* @param storePayloads true if payloads should be stored for this field
* @param indexOptions if term freqs should be omitted for this field
*/
synchronized public FieldInfo add(String name, boolean isIndexed, boolean storeTermVector,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
boolean omitNorms, boolean storePayloads, IndexOptions indexOptions) {
FieldInfo fi = fieldInfo(name);
if (fi == null) {
return addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions);
} else {
fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions);
}
assert fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.storePayloads;
return fi;
}
synchronized public FieldInfo add(FieldInfo fi) {
return add(fi.name, fi.isIndexed, fi.storeTermVector,
fi.storePositionWithTermVector, fi.storeOffsetWithTermVector,
fi.omitNorms, fi.storePayloads,
fi.indexOptions);
}
private FieldInfo addInternal(String name, boolean isIndexed,
boolean storeTermVector, boolean storePositionWithTermVector,
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, IndexOptions indexOptions) {
name = StringHelper.intern(name);
FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector,
storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions);
byNumber.add(fi);
byName.put(name, fi);
return fi;
}
public int fieldNumber(String fieldName) {
FieldInfo fi = fieldInfo(fieldName);
return (fi != null) ? fi.number : -1;
}
public FieldInfo fieldInfo(String fieldName) {
return byName.get(fieldName);
}
/**
* Return the fieldName identified by its number.
*
* @param fieldNumber
* @return the fieldName or an empty string when the field
* with the given number doesn't exist.
*/
public String fieldName(int fieldNumber) {
FieldInfo fi = fieldInfo(fieldNumber);
return (fi != null) ? fi.name : "";
}
/**
* Return the fieldinfo object referenced by the fieldNumber.
* @param fieldNumber
* @return the FieldInfo object or null when the given fieldNumber
* doesn't exist.
*/
public FieldInfo fieldInfo(int fieldNumber) {
return (fieldNumber >= 0) ? byNumber.get(fieldNumber) : null;
}
public int size() {
return byNumber.size();
}
public boolean hasVectors() {
boolean hasVectors = false;
for (int i = 0; i < size(); i++) {
if (fieldInfo(i).storeTermVector) {
hasVectors = true;
break;
}
}
return hasVectors;
}
public void write(Directory d, String name) throws IOException {
IndexOutput output = d.createOutput(name);
try {
write(output);
} finally {
output.close();
}
}
public void write(IndexOutput output) throws IOException {
output.writeVInt(CURRENT_FORMAT);
output.writeVInt(size());
for (int i = 0; i < size(); i++) {
FieldInfo fi = fieldInfo(i);
assert fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.storePayloads;
byte bits = 0x0;
if (fi.isIndexed) bits |= IS_INDEXED;
if (fi.storeTermVector) bits |= STORE_TERMVECTOR;
if (fi.storePositionWithTermVector) bits |= STORE_POSITIONS_WITH_TERMVECTOR;
if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR;
if (fi.omitNorms) bits |= OMIT_NORMS;
if (fi.storePayloads) bits |= STORE_PAYLOADS;
if (fi.indexOptions == IndexOptions.DOCS_ONLY)
bits |= OMIT_TERM_FREQ_AND_POSITIONS;
else if (fi.indexOptions == IndexOptions.DOCS_AND_FREQS)
bits |= OMIT_POSITIONS;
output.writeString(fi.name);
output.writeByte(bits);
}
}
private void read(IndexInput input, String fileName) throws IOException {
int firstInt = input.readVInt();
if (firstInt < 0) {
// This is a real format
format = firstInt;
} else {
format = FORMAT_PRE;
}
if (format != FORMAT_PRE && format != FORMAT_START && format != FORMAT_OMIT_POSITIONS) {
throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\"");
}
int size;
if (format == FORMAT_PRE) {
size = firstInt;
} else {
size = input.readVInt(); //read in the size
}
for (int i = 0; i < size; i++) {
String name = StringHelper.intern(input.readString());
byte bits = input.readByte();
boolean isIndexed = (bits & IS_INDEXED) != 0;
boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
boolean storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
boolean storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
boolean omitNorms = (bits & OMIT_NORMS) != 0;
boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
final IndexOptions indexOptions;
if ((bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
indexOptions = IndexOptions.DOCS_ONLY;
} else if ((bits & OMIT_POSITIONS) != 0) {
if (format <= FORMAT_OMIT_POSITIONS) {
indexOptions = IndexOptions.DOCS_AND_FREQS;
} else {
throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format + " (resource: " + input + ")");
}
} else {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
}
// LUCENE-3027: past indices were able to write
// storePayloads=true when omitTFAP is also true,
// which is invalid. We correct that, here:
if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
storePayloads = false;
}
addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions);
}
if (input.getFilePointer() != input.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
}
}
}