Package org.apache.xindice.core.indexer

Source Code of org.apache.xindice.core.indexer.ValueIndexer

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* $Id: ValueIndexer.java 519202 2007-03-17 01:34:14Z vgritsenko $
*/

package org.apache.xindice.core.indexer;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.xindice.core.Collection;
import org.apache.xindice.core.DBException;
import org.apache.xindice.core.data.Key;
import org.apache.xindice.core.data.Value;
import org.apache.xindice.core.filer.BTree;
import org.apache.xindice.core.filer.BTreeCallback;
import org.apache.xindice.core.filer.BTreeCorruptException;
import org.apache.xindice.core.filer.BTreeNotFoundException;
import org.apache.xindice.core.filer.Paged;
import org.apache.xindice.core.query.QueryEngine;
import org.apache.xindice.util.Configuration;
import org.apache.xindice.xml.SymbolTable;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
* ValueIndexer is a basic implementation of the Indexer interface.
* It is used for maintaining element and element@attribute value
* indexes.
*
* @version $Revision: 519202 $, $Date: 2007-03-16 21:34:14 -0400 (Fri, 16 Mar 2007) $
*/
public final class ValueIndexer extends BTree implements Indexer {

    private static final Log log = LogFactory.getLog(ValueIndexer.class);

    private static final IndexMatch[] EmptyMatches = new IndexMatch[0];
    private static final Value EmptyValue = new Value(new byte[0]);

    private static final long MATCH_INFO = -1000;

    private static final String NAME = "name";
    private static final String PATTERN = "pattern";
    private static final String TYPE = "type";

    private static final int STRING = 0;
    private static final int TRIMMED = 1;
    private static final int INTEGER = 2;
    private static final int FLOAT = 3;
    private static final int BYTE = 4;
    private static final int CHAR = 5;
    private static final int BOOLEAN = 6;

    private static final int[] sizes = {-1, -1, 8, 8, 1, 2, 1};

    private static final String STRING_VAL = "string";
    private static final String TRIMMED_VAL = "trimmed";
    private static final String SHORT_VAL = "short";
    private static final String INT_VAL = "int";
    private static final String LONG_VAL = "long";
    private static final String FLOAT_VAL = "float";
    private static final String DOUBLE_VAL = "double";
    private static final String BYTE_VAL = "byte";
    private static final String CHAR_VAL = "char";
    private static final String BOOLEAN_VAL = "boolean";

    private Collection collection;
    private SymbolTable symbols;

    private String name;
    private String pattern;
    private int type;
    private int typeSize = 32;
    private boolean wildcard;


    public ValueIndexer() {
        super();
    }

    /**
     * Override createFileHeader - set page size to 1024
     */
    protected Paged.FileHeader createFileHeader() {
        Paged.FileHeader header = super.createFileHeader();
        header.setPageSize(1024);
        return header;
    }

    public void setConfig(Configuration config) {
        super.setConfig(config);
        try {
            name = config.getAttribute(NAME);

            pattern = config.getAttribute(PATTERN);
            wildcard = pattern.indexOf('*') != -1;

            // Determine the Index Type
            String tv = config.getAttribute(TYPE, STRING_VAL).toLowerCase();
            if (tv.equals(STRING_VAL)) {
                type = STRING;
            } else if (tv.equals(TRIMMED_VAL)) {
                type = TRIMMED;
            } else if (tv.equals(SHORT_VAL)) {
                type = INTEGER;
            } else if (tv.equals(INT_VAL)) {
                type = INTEGER;
            } else if (tv.equals(LONG_VAL)) {
                type = INTEGER;
            } else if (tv.equals(FLOAT_VAL)) {
                type = FLOAT;
            } else if (tv.equals(DOUBLE_VAL)) {
                type = FLOAT;
            } else if (tv.equals(BYTE_VAL)) {
                type = BYTE;
            } else if (tv.equals(CHAR_VAL)) {
                type = CHAR;
            } else if (tv.equals(BOOLEAN_VAL)) {
                type = BOOLEAN;
            } else {
                if (pattern.indexOf('@') != -1) {
                    type = STRING;
                } else {
                    type = TRIMMED;
                }
            }

            typeSize = sizes[type];

            setLocation(name);
        } catch (Exception e) {
            if (log.isWarnEnabled()) {
                log.warn("ignored exception", e);
            }
        }
    }

    public String getName() {
        return name;
    }

    public void setLocation(String location) {
        setFile(new File(collection.getCollectionRoot(), location + ".idx"));
    }

    public void setCollection(Collection collection) {
        try {
            this.collection = collection;
            symbols = collection.getSymbols();
        } catch (Exception e) {
            if (log.isWarnEnabled()) {
                log.warn("ignored exception", e);
            }
        }
    }

    public String getIndexStyle() {
        return STYLE_NODEVALUE;
    }

    public String getPattern() {
        return pattern;
    }

    /**
     * Creates new Value object that depends on string parameter and a type of indexer.
     * The idea here is that any value of any type should be transformed to a byte array
     * in such a way that two Values of the same type are comparable. Value objects are
     * compared by comparing their data arrays byte-by-byte, starting from byte with
     * index 0. Also, data arrays will behave as array of <b>unsigned</b> bytes with
     * values ranging from 0 to 255.
     *
     * @param value
     * @return new Value object that represents specific value of this indexer type
     */
    public Value getTypedValue(String value) {
        if (type != STRING && type != TRIMMED) {
            value = value.trim();

            if (value.length() == 0) {
                return EmptyValue;
            }

            byte[] b = new byte[typeSize];
            try {
                switch (type) {
                    case INTEGER:
                        /*
                            Generally, two integers can be compared byte-by-byte, returning correct results,
                            except negative numbers which will be always bigger than positive numbers.
                            To solve this, change the value to be unsigned. Number range changes from
                            Long.MIN_VALUE / Long.MAX_VALUE to 0 / Long.MAX_VALUE - Long.MIN_VALUE.
                            This is done by flipping the first bit of the byte with index 0.
                        */
                        long l = Long.parseLong(value);
                        b[0] = (byte) ((l >>> 56) & 0xFF);
                        b[1] = (byte) ((l >>> 48) & 0xFF);
                        b[2] = (byte) ((l >>> 40) & 0xFF);
                        b[3] = (byte) ((l >>> 32) & 0xFF);
                        b[4] = (byte) ((l >>> 24) & 0xFF);
                        b[5] = (byte) ((l >>> 16) & 0xFF);
                        b[6] = (byte) ((l >>> 8) & 0xFF);
                        b[7] = (byte) (l & 0xFF);
                        b[0] = (byte) (b[0] ^ 0x80);
                        break;

                    case FLOAT:
                        /*
                            Float/Double number are stored according to IEEE standard 754. In short,
                            float numbers have the folloing format: 1 bit to indicate the sign of
                            the number, 8 bits for exponent, 23 bits for mantissa, and double
                            numbers have the following format: 1 bit to indicate the sign of
                            the number, 11 bits for exponent, 52 bits for mantissa. Both float and
                            double are normalized, so they can be compared byte-by-byte, except
                            that comparing two negative numbers or two number with different signs
                            will return incorrect results.
                            This is solved by changing number sign (is is stored in the first bit)
                            and flipping all the bits for negative numbers.
                        */
                        double d = Double.parseDouble(value);
                        long bits = Double.doubleToLongBits(d);

                        b[0] = (byte) ((bits >>> 56) & 0xFF);
                        if ((b[0] & 0xFF) != 0) {
                            // negative numbers
                            b[0] ^= 0xFF;
                            b[1] = (byte) ((bits >>> 48) & 0xFF ^ 0xFF);
                            b[2] = (byte) ((bits >>> 40) & 0xFF ^ 0xFF);
                            b[3] = (byte) ((bits >>> 32) & 0xFF ^ 0xFF);
                            b[4] = (byte) ((bits >>> 24) & 0xFF ^ 0xFF);
                            b[5] = (byte) ((bits >>> 16) & 0xFF ^ 0xFF);
                            b[6] = (byte) ((bits >>> 8) & 0xFF ^ 0xFF);
                            b[7] = (byte) (bits & 0xFF ^ 0xFF);
                        } else {
                            b[0] ^= 0x80;
                            b[1] = (byte) ((bits >>> 48) & 0xFF);
                            b[2] = (byte) ((bits >>> 40) & 0xFF);
                            b[3] = (byte) ((bits >>> 32) & 0xFF);
                            b[4] = (byte) ((bits >>> 24) & 0xFF);
                            b[5] = (byte) ((bits >>> 16) & 0xFF);
                            b[6] = (byte) ((bits >>> 8) & 0xFF);
                            b[7] = (byte) (bits & 0xFF);
                        }

                        break;

                    case BYTE:
                        b[0] = Byte.parseByte(value);
                        b[0] = (byte) (b[0] ^ 0x80);
                        break;

                    case CHAR:
                        char c = value.charAt(0);
                        b[0] = (byte) ((c >>> 8) & 0xFF);
                        b[1] = (byte) ( c        & 0xFF);
                        break;

                    case BOOLEAN:
                        if ("[true][yes][1][y][on]".indexOf("[" + value.toLowerCase() + "]") != -1) {
                            b[0] = 1;
                        } else if ("[false][no][0][n][off]".indexOf("[" + value.toLowerCase() + "]") != -1) {
                            b[0] = 0;
                        } else {
                            return EmptyValue;
                        }
                        break;

                    default:
                        if (log.isWarnEnabled()) {
                            log.warn("invalid type : " + type);
                        }
                }

                return new Value(b);

            } catch (Exception e) {
                return EmptyValue;
            }
        }

        if (type == TRIMMED) {
            value = QueryEngine.normalizeString(value);
        }

        return new Value(value);
    }

    private Value getCombinedValue(Key key, int pos, int len, short elemID, short attrID) {
        Value result;
        try {
            int l = key.getLength();
            byte[] b = new byte[l + 13];

            // Write the key
            key.copyTo(b, 0, l);
            b[l] = 0;

            // Write the pos
            b[l + 1] = (byte) ((pos >>> 24) & 0xFF);
            b[l + 2] = (byte) ((pos >>> 16) & 0xFF);
            b[l + 3] = (byte) ((pos >>>  8) & 0xFF);
            b[l + 4] = (byte) ( pos         & 0xFF);

            // Write the len
            b[l + 5] = (byte) ((len >>> 24) & 0xFF);
            b[l + 6] = (byte) ((len >>> 16) & 0xFF);
            b[l + 7] = (byte) ((len >>>  8) & 0xFF);
            b[l + 8] = (byte) ( len         & 0xFF);

            // Write the elemID
            b[l + 9] = (byte) ((elemID >>> 8) & 0xFF);
            b[l + 10] = (byte) ( elemID       & 0xFF);

            // Write the attrID
            b[l + 11] = (byte) ((attrID >>> 8) & 0xFF);
            b[l + 12] = (byte) ( attrID        & 0xFF);

            result = new Value(b);
        } catch (Exception e) {
            result = null; // This will never happen
        }
        return result;
    }

    private IndexMatch getIndexMatch(Value v) {
        byte[] b = v.getData();
        int l = b.length - 13;
        Key key = new Key(b, 0, b.length - 13);

        int pos = ((b[l + 1] << 24) | (b[l + 2] << 16) | (b[l + 3] << 8) | b[l + 4]);
        int len = ((b[l + 5] << 24) | (b[l + 6] << 16) | (b[l + 7] << 8) | b[l + 8]);
        short elemID = (short) ((b[l + 9] << 8) | b[l + 10]);
        short attrID = (short) ((b[l + 11] << 8) | b[l + 12]);

        return new IndexMatch(key, pos, len, elemID, attrID);
    }

    public void remove(String value, Key key, int pos, int len, short elemID, short attrID) throws DBException {
        Value v = getTypedValue(value);
        if (type != STRING && type != TRIMMED && v.getLength() == 0) {
            return;
        }

        try {
            BTreeRootInfo root = findBTreeRoot(v);
            Value cv = getCombinedValue(key, pos, len, elemID, attrID);
            removeValue(root, cv);
        } catch (DBException e) {
            throw e;
        } catch (Exception e) {
            if (log.isWarnEnabled()) {
                log.warn("ignored exception", e);
            }
        }
    }

    public void add(String value, Key key, int pos, int len, short elemID, short attrID) throws DBException {
        Value v = getTypedValue(value);
        if (type != STRING && type != TRIMMED && v.getLength() == 0) {
            return;
        }

        try {
            BTreeRootInfo root;

            try {
                root = findBTreeRoot(v);
            } catch (BTreeNotFoundException e) {
                root = createBTreeRoot(v);
            }

            Value cv = getCombinedValue(key, pos, len, elemID, attrID);
            addValue(root, cv, MATCH_INFO);
        } catch (DBException e) {
            throw e;
        } catch (IOException e) {
            throw new BTreeCorruptException("Corruption detected on add", e);
        } catch (Exception e) {
            if (log.isWarnEnabled()) {
                log.warn("ignored exception", e);
            }
        }
    }

    public IndexMatch[] queryMatches(final IndexQuery query) throws DBException {
        // Pre-process the value-set for typing and trimming
        if (type != STRING) {
            Value[] vals = query.getValues();
            for (int i = 0; i < vals.length; i++) {
                vals[i] = getTypedValue(vals[i].toString());
            }
        }

        // Now issue the query
        final List results = new ArrayList();

        try {
            query(query, new BTreeCallback() {
                public boolean indexInfo(Value value, long pos) {
                    try {
                        if (pos == MATCH_INFO) {
                            IndexMatch match = getIndexMatch(value);
                            if (!wildcard)
                                results.add(match);
                            else {
                                IndexPattern pt = new IndexPattern(symbols, match.getElement(), match.getAttribute());
                                if (pt.getMatchLevel(query.getPattern()) > 0) {
                                    results.add(match);
                                }
                            }
                        } else {
                            BTreeRootInfo root = new BTreeRootInfo(value, pos);
                            query(root, null, this);
                        }
                        return true;
                    } catch (Exception e) {
                        if (log.isWarnEnabled()) {
                            log.warn("ignored exception", e);
                        }
                    }
                    return true;
                }
            });
        } catch (IOException e) {
            throw new BTreeCorruptException("Corruption detected on query", e);
        } catch (Exception e) {
            if (log.isWarnEnabled()) {
                log.warn("ignored exception", e);
            }
        }

        return (IndexMatch[]) results.toArray(EmptyMatches);
    }
}
TOP

Related Classes of org.apache.xindice.core.indexer.ValueIndexer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.