Package com.thinkaurelius.titan.diskstorage.lucene

Source Code of com.thinkaurelius.titan.diskstorage.lucene.LuceneIndex$Transaction

package com.thinkaurelius.titan.diskstorage.lucene;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.shape.Shape;
import com.thinkaurelius.titan.core.Mapping;
import com.thinkaurelius.titan.core.Order;
import com.thinkaurelius.titan.core.attribute.*;
import com.thinkaurelius.titan.diskstorage.PermanentStorageException;
import com.thinkaurelius.titan.diskstorage.StorageException;
import com.thinkaurelius.titan.diskstorage.TemporaryStorageException;
import com.thinkaurelius.titan.diskstorage.TransactionHandle;
import com.thinkaurelius.titan.diskstorage.indexing.*;
import com.thinkaurelius.titan.graphdb.configuration.GraphDatabaseConfiguration;
import com.thinkaurelius.titan.graphdb.database.serialize.AttributeUtil;
import com.thinkaurelius.titan.graphdb.query.TitanPredicate;
import com.thinkaurelius.titan.graphdb.query.condition.*;
import com.thinkaurelius.titan.util.system.IOUtils;
import org.apache.commons.configuration.Configuration;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.queries.BooleanFilter;
import org.apache.lucene.queries.TermsFilter;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.spatial.SpatialStrategy;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialOperation;
import org.apache.lucene.spatial.vector.PointVectorStrategy;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.locks.ReentrantLock;

/**
* @author Matthias Broecheler (me@matthiasb.com)
*/

public class LuceneIndex implements IndexProvider {

    private Logger log = LoggerFactory.getLogger(LuceneIndex.class);


    private static final String DOCID = "_____elementid";
    private static final String GEOID = "_____geo";
    private static final int MAX_STRING_FIELD_LEN = 256;

    private static final Version LUCENE_VERSION = Version.LUCENE_41;

    private static final int GEO_MAX_LEVELS = 11;

    private final Analyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);

    private final Map<String, IndexWriter> writers = new HashMap<String, IndexWriter>(4);
    private final ReentrantLock writerLock = new ReentrantLock();

    private Map<String, SpatialStrategy> spatial = new ConcurrentHashMap<String, SpatialStrategy>(12);
    private SpatialContext ctx = SpatialContext.GEO;

    private final String basePath;

    public LuceneIndex(Configuration config) {
        String dir = config.getString(GraphDatabaseConfiguration.STORAGE_DIRECTORY_KEY, "");
        Preconditions.checkArgument(StringUtils.isNotBlank(dir), "Need to configure directory for lucene");
        File directory = new File(dir);
        if (!directory.exists()) directory.mkdirs();
        if (!directory.exists() || !directory.isDirectory() || !directory.canWrite())
            throw new IllegalArgumentException("Cannot access or write to directory: " + dir);
        basePath = directory.getAbsolutePath();
        log.debug("Configured Lucene to use base directory [{}]", basePath);
    }

    private Directory getStoreDirectory(String store) throws StorageException {
        Preconditions.checkArgument(StringUtils.isAlphanumeric(store), "Invalid store name: %s", store);
        String dir = basePath + File.separator + store;
        try {
            File path = new File(dir);
            if (!path.exists()) path.mkdirs();
            if (!path.exists() || !path.isDirectory() || !path.canWrite())
                throw new PermanentStorageException("Cannot access or write to directory: " + dir);
            log.debug("Opening store directory [{}]", path);
            return FSDirectory.open(path);
        } catch (IOException e) {
            throw new PermanentStorageException("Could not open directory: " + dir, e);
        }
    }

    private IndexWriter getWriter(String store) throws StorageException {
        Preconditions.checkArgument(writerLock.isHeldByCurrentThread());
        IndexWriter writer = writers.get(store);
        if (writer == null) {
            IndexWriterConfig iwc = new IndexWriterConfig(LUCENE_VERSION, analyzer);
            iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
            try {
                writer = new IndexWriter(getStoreDirectory(store), iwc);
                writers.put(store, writer);
            } catch (IOException e) {
                throw new PermanentStorageException("Could not create writer", e);
            }
        }
        return writer;
    }

    private SpatialStrategy getSpatialStrategy(String key) {
        SpatialStrategy strategy = spatial.get(key);
        if (strategy == null) {
            synchronized (spatial) {
                if (!spatial.containsKey(key)) {
//                    SpatialPrefixTree grid = new GeohashPrefixTree(ctx, GEO_MAX_LEVELS);
//                    strategy = new RecursivePrefixTreeStrategy(grid, key);
                    strategy = new PointVectorStrategy(ctx, key);
                    spatial.put(key, strategy);
                } else return spatial.get(key);
            }
        }
        return strategy;
    }

    @Override
    public void register(String store, String key, KeyInformation information, TransactionHandle tx) throws StorageException {
        Class<?> dataType = information.getDataType();
        Mapping map = Mapping.getMapping(information);
        Preconditions.checkArgument(map==Mapping.DEFAULT || AttributeUtil.isString(dataType),
                "Specified illegal mapping [%s] for data type [%s]",map,dataType);    }

    @Override
    public void mutate(Map<String, Map<String, IndexMutation>> mutations, KeyInformation.IndexRetriever informations, TransactionHandle tx) throws StorageException {
        Transaction ltx = (Transaction) tx;
        writerLock.lock();
        try {
            for (Map.Entry<String, Map<String, IndexMutation>> stores : mutations.entrySet()) {
                mutateStores(stores, informations);
            }
            ltx.postCommit();
        } catch (IOException e) {
            throw new TemporaryStorageException("Could not update Lucene index", e);
        } finally {
            writerLock.unlock();
        }
    }

    private void mutateStores(Map.Entry<String, Map<String, IndexMutation>> stores, KeyInformation.IndexRetriever informations) throws StorageException, IOException {

        IndexReader reader = null;
        try {
            String storename = stores.getKey();
            IndexWriter writer = getWriter(storename);
            reader = DirectoryReader.open(writer, true);
            IndexSearcher searcher = new IndexSearcher(reader);
            for (Map.Entry<String, IndexMutation> entry : stores.getValue().entrySet()) {
                String docid = entry.getKey();
                IndexMutation mutation = entry.getValue();
                Term docTerm = new Term(DOCID, docid);

                if (mutation.isDeleted()) {
                    log.trace("Deleted entire document [{}]", docid);
                    writer.deleteDocuments(docTerm);
                    continue;
                }

                Document doc = null;
                TopDocs hits = searcher.search(new TermQuery(docTerm), 10);
                Map<String, Shape> geofields = Maps.newHashMap();

                if (hits.scoreDocs.length == 0) {
                    log.trace("Creating new document for [{}]", docid);
                    doc = new Document();
                    Field docidField = new StringField(DOCID, docid, Field.Store.YES);
                    doc.add(docidField);
                } else if (hits.scoreDocs.length > 1) {
                    throw new IllegalArgumentException("More than one document found for document id: " + docid);
                } else {
                    log.trace("Updating existing document for [{}]", docid);
                    int docId = hits.scoreDocs[0].doc;
                    //retrieve the old document
                    doc = searcher.doc(docId);
                    for (IndexableField field : doc.getFields()) {
                        if (field.stringValue().startsWith(GEOID)) {
                            geofields.put(field.name(), ctx.readShape(field.stringValue().substring(GEOID.length())));
                        }
                    }
                }
                Preconditions.checkNotNull(doc);
                for (String key : mutation.getDeletions()) {
                    if (doc.getField(key) != null) {
                        log.trace("Removing field [{}] on document [{}]", key, docid);
                        doc.removeFields(key);
                        geofields.remove(key);
                    }
                }
                for (IndexEntry add : mutation.getAdditions()) {
                    log.trace("Adding field [{}] on document [{}]", add.key, docid);
                    if (doc.getField(add.key) != null) doc.removeFields(add.key);
                    if (add.value instanceof Number) {
                        Field field = null;
                        if (AttributeUtil.isWholeNumber((Number) add.value)) {
                            field = new LongField(add.key, ((Number) add.value).longValue(), Field.Store.YES);
                        } else { //double or float
                            field = new DoubleField(add.key, ((Number) add.value).doubleValue(), Field.Store.YES);
                        }
                        doc.add(field);
                    } else if (AttributeUtil.isString(add.value)) {
                        String str = (String) add.value;
                        Mapping mapping = Mapping.getMapping(storename, add.key, informations);
                        Field field;
                        switch (mapping) {
                            case DEFAULT:
                            case TEXT:
                                field = new TextField(add.key, str, Field.Store.YES);
                                break;
                            case STRING:
                                field = new StringField(add.key, str, Field.Store.YES);
                                break;
                            default:
                                throw new IllegalArgumentException("Illegal mapping specified: " + mapping);
                        }
                        doc.add(field);
                    } else if (add.value instanceof Geoshape) {
                        Shape shape = ((Geoshape) add.value).convert2Spatial4j();
                        geofields.put(add.key, shape);
                        doc.add(new StoredField(add.key, GEOID + ctx.toString(shape)));

                    } else throw new IllegalArgumentException("Unsupported type: " + add.value);
                }
                for (Map.Entry<String, Shape> geo : geofields.entrySet()) {
                    log.trace("Updating geo-indexes for key {}", geo.getKey());
                    for (IndexableField f : getSpatialStrategy(geo.getKey()).createIndexableFields(geo.getValue())) {
                        doc.add(f);
                    }
                }

                //write the old document to the index with the modifications
                writer.updateDocument(new Term(DOCID, docid), doc);
            }
            writer.commit();
        } finally {
            IOUtils.closeQuietly(reader);
        }
    }

    private static final Sort getSortOrder(IndexQuery query) {
        Sort sort = new Sort();
        List<IndexQuery.OrderEntry> orders = query.getOrder();
        if (!orders.isEmpty()) {
            SortField[] fields = new SortField[orders.size()];
            for (int i = 0; i < orders.size(); i++) {
                IndexQuery.OrderEntry order = orders.get(i);
                SortField.Type sortType = null;
                Class datatype = order.getDatatype();
                if (AttributeUtil.isString(datatype)) sortType = SortField.Type.STRING;
                else if (AttributeUtil.isWholeNumber(datatype)) sortType = SortField.Type.LONG;
                else if (AttributeUtil.isDecimal(datatype)) sortType = SortField.Type.DOUBLE;
                else
                    Preconditions.checkArgument(false, "Unsupported order specified on field [%s] with datatype [%s]", order.getKey(), datatype);

                fields[i] = new SortField(order.getKey(), sortType, order.getOrder() == Order.DESC);
            }
            sort.setSort(fields);
        }
        return sort;
    }

    @Override
    public List<String> query(IndexQuery query, KeyInformation.IndexRetriever informations, TransactionHandle tx) throws StorageException {
        //Construct query
        Filter q = convertQuery(query.getCondition(),informations.get(query.getStore()));

        try {
            IndexSearcher searcher = ((Transaction) tx).getSearcher(query.getStore());
            if (searcher == null) return ImmutableList.of(); //Index does not yet exist
            long time = System.currentTimeMillis();
            TopDocs docs = searcher.search(new MatchAllDocsQuery(), q, query.hasLimit() ? query.getLimit() : Integer.MAX_VALUE - 1, getSortOrder(query));
            log.debug("Executed query [{}] in {} ms", q, System.currentTimeMillis() - time);
            List<String> result = new ArrayList<String>(docs.scoreDocs.length);
            for (int i = 0; i < docs.scoreDocs.length; i++) {
                result.add(searcher.doc(docs.scoreDocs[i].doc).getField(DOCID).stringValue());
            }
            return result;
        } catch (IOException e) {
            throw new TemporaryStorageException("Could not execute Lucene query", e);
        }
    }

    private static final Filter numericFilter(String key, Cmp relation, Number value) {
        switch (relation) {
            case EQUAL:
                return (value instanceof Long || value instanceof Integer) ?
                        NumericRangeFilter.newLongRange(key, value.longValue(), value.longValue(), true, true) :
                        NumericRangeFilter.newDoubleRange(key, value.doubleValue(), value.doubleValue(), true, true);
            case NOT_EQUAL:
                BooleanFilter q = new BooleanFilter();
                if (value instanceof Long || value instanceof Integer) {
                    q.add(NumericRangeFilter.newLongRange(key, Long.MIN_VALUE, value.longValue(), true, false), BooleanClause.Occur.SHOULD);
                    q.add(NumericRangeFilter.newLongRange(key, value.longValue(), Long.MAX_VALUE, false, true), BooleanClause.Occur.SHOULD);
                } else {
                    q.add(NumericRangeFilter.newDoubleRange(key, Double.MIN_VALUE, value.doubleValue(), true, false), BooleanClause.Occur.SHOULD);
                    q.add(NumericRangeFilter.newDoubleRange(key, value.doubleValue(), Double.MAX_VALUE, false, true), BooleanClause.Occur.SHOULD);
                }
                return q;
            case LESS_THAN:
                return (value instanceof Long || value instanceof Integer) ?
                        NumericRangeFilter.newLongRange(key, Long.MIN_VALUE, value.longValue(), true, false) :
                        NumericRangeFilter.newDoubleRange(key, Double.MIN_VALUE, value.doubleValue(), true, false);
            case LESS_THAN_EQUAL:
                return (value instanceof Long || value instanceof Integer) ?
                        NumericRangeFilter.newLongRange(key, Long.MIN_VALUE, value.longValue(), true, true) :
                        NumericRangeFilter.newDoubleRange(key, Double.MIN_VALUE, value.doubleValue(), true, true);
            case GREATER_THAN:
                return (value instanceof Long || value instanceof Integer) ?
                        NumericRangeFilter.newLongRange(key, value.longValue(), Long.MAX_VALUE, false, true) :
                        NumericRangeFilter.newDoubleRange(key, value.doubleValue(), Double.MAX_VALUE, false, true);
            case GREATER_THAN_EQUAL:
                return (value instanceof Long || value instanceof Integer) ?
                        NumericRangeFilter.newLongRange(key, value.longValue(), Long.MAX_VALUE, true, true) :
                        NumericRangeFilter.newDoubleRange(key, value.doubleValue(), Double.MAX_VALUE, true, true);
            default:
                throw new IllegalArgumentException("Unexpected relation: " + relation);
        }
    }

    private final Filter convertQuery(Condition<?> condition, KeyInformation.StoreRetriever informations) {
        if (condition instanceof PredicateCondition) {
            PredicateCondition<String, ?> atom = (PredicateCondition) condition;
            Object value = atom.getValue();
            String key = atom.getKey();
            TitanPredicate titanPredicate = atom.getPredicate();
            if (value instanceof Number) {
                Preconditions.checkArgument(titanPredicate instanceof Cmp, "Relation not supported on numeric types: " + titanPredicate);
                Preconditions.checkArgument(value instanceof Number);
                return numericFilter(key, (Cmp) titanPredicate, (Number) value);
            } else if (value instanceof String) {
                Mapping map = Mapping.getMapping(informations.get(key));
                if ((map==Mapping.DEFAULT || map==Mapping.TEXT) && !titanPredicate.toString().startsWith("CONTAINS"))
                    throw new IllegalArgumentException("Text mapped string values only support CONTAINS queries and not: " + titanPredicate);
                if (map==Mapping.STRING && titanPredicate.toString().startsWith("CONTAINS"))
                    throw new IllegalArgumentException("String mapped string values do not support CONTAINS queries: " + titanPredicate);

                if (titanPredicate == Text.CONTAINS) {
                    value = ((String) value).toLowerCase();
                    return new TermsFilter(new Term(key, (String) value));
                } else if (titanPredicate == Text.CONTAINS_PREFIX) {
                    value = ((String) value).toLowerCase();
                    return new PrefixFilter(new Term(key, (String) value));
                } else if (titanPredicate == Text.PREFIX) {
                    return new PrefixFilter(new Term(key, (String) value));
//                } else if (titanPredicate == Text.CONTAINS_REGEX) {
//                    value = ((String) value).toLowerCase();
//                    return new RegexpQuery(new Term(key,(String)value));
                } else if (titanPredicate == Cmp.EQUAL) {
                    return new TermsFilter(new Term(key,(String)value));
                } else if (titanPredicate == Cmp.NOT_EQUAL) {
                    BooleanFilter q = new BooleanFilter();
                    q.add(new TermsFilter(new Term(key,(String)value)), BooleanClause.Occur.MUST_NOT);
                    return q;
                } else
                    throw new IllegalArgumentException("Relation is not supported for string value: " + titanPredicate);
            } else if (value instanceof Geoshape) {
                Preconditions.checkArgument(titanPredicate == Geo.WITHIN, "Relation is not supported for geo value: " + titanPredicate);
                Shape shape = ((Geoshape) value).convert2Spatial4j();
                SpatialArgs args = new SpatialArgs(SpatialOperation.IsWithin, shape);
                return getSpatialStrategy(key).makeFilter(args);
            } else throw new IllegalArgumentException("Unsupported type: " + value);
        } else if (condition instanceof Not) {
            BooleanFilter q = new BooleanFilter();
            q.add(convertQuery(((Not) condition).getChild(),informations), BooleanClause.Occur.MUST_NOT);
            return q;
        } else if (condition instanceof And) {
            BooleanFilter q = new BooleanFilter();
            for (Condition c : condition.getChildren()) {
                q.add(convertQuery(c,informations), BooleanClause.Occur.MUST);
            }
            return q;
        } else if (condition instanceof Or) {
            BooleanFilter q = new BooleanFilter();
            for (Condition c : condition.getChildren()) {
                q.add(convertQuery(c,informations), BooleanClause.Occur.SHOULD);
            }
            return q;
        } else throw new IllegalArgumentException("Invalid condition: " + condition);
    }

    @Override
    public Iterable<RawQuery.Result<String>> query(RawQuery query, KeyInformation.IndexRetriever informations, TransactionHandle tx) throws StorageException {
        Query q;
        try {
            q = new QueryParser(LUCENE_VERSION,"_all",analyzer).parse(query.getQuery());
        } catch (ParseException e) {
            throw new PermanentStorageException("Could not parse raw query: "+query.getQuery(),e);
        }

        try {
            IndexSearcher searcher = ((Transaction) tx).getSearcher(query.getStore());
            if (searcher == null) return ImmutableList.of(); //Index does not yet exist

            long time = System.currentTimeMillis();
            TopDocs docs = searcher.search(q, query.hasLimit() ? query.getLimit() : Integer.MAX_VALUE - 1);
            log.debug("Executed query [{}] in {} ms",q, System.currentTimeMillis() - time);
            List<RawQuery.Result<String>> result = new ArrayList<RawQuery.Result<String>>(docs.scoreDocs.length);
            for (int i = 0; i < docs.scoreDocs.length; i++) {
                result.add(new RawQuery.Result<String>(searcher.doc(docs.scoreDocs[i].doc).getField(DOCID).stringValue(),docs.scoreDocs[i].score));
            }
            return result;
        } catch (IOException e) {
            throw new TemporaryStorageException("Could not execute Lucene query", e);
        }
    }

    @Override
    public TransactionHandle beginTransaction() throws StorageException {
        return new Transaction();
    }

    @Override
    public boolean supports(KeyInformation information, TitanPredicate titanPredicate) {
        Class<?> dataType = information.getDataType();
        Mapping mapping = Mapping.getMapping(information);
        if (mapping!=Mapping.DEFAULT && !AttributeUtil.isString(dataType)) return false;

        if (Number.class.isAssignableFrom(dataType)) {
            if (titanPredicate instanceof Cmp) return true;
        } else if (dataType == Geoshape.class) {
            return titanPredicate == Geo.WITHIN;
        } else if (AttributeUtil.isString(dataType)) {
            switch(mapping) {
                case DEFAULT:
                case TEXT:
                    return titanPredicate == Text.CONTAINS || titanPredicate == Text.CONTAINS_PREFIX; // || titanPredicate == Text.CONTAINS_REGEX;
                case STRING:
                    return titanPredicate == Cmp.EQUAL || titanPredicate==Cmp.NOT_EQUAL || titanPredicate==Text.PREFIX; //  || titanPredicate==Text.REGEX
            }
        }
        return false;
    }

    @Override
    public boolean supports(KeyInformation information) {
        Class<?> dataType = information.getDataType();
        Mapping mapping = Mapping.getMapping(information);
        if (Number.class.isAssignableFrom(dataType) || dataType == Geoshape.class) {
            if (mapping==Mapping.DEFAULT) return true;
        } else if (AttributeUtil.isString(dataType)) {
            if (mapping==Mapping.DEFAULT || mapping==Mapping.STRING || mapping==Mapping.TEXT) return true;
        }
        return false;
    }

    @Override
    public void close() throws StorageException {
        try {
            for (IndexWriter w : writers.values()) w.close();
        } catch (IOException e) {
            throw new PermanentStorageException("Could not close writers", e);
        }
    }

    @Override
    public void clearStorage() throws StorageException {
        try {
            FileUtils.deleteDirectory(new File(basePath));
        } catch (IOException e) {
            throw new PermanentStorageException("Could not delete lucene directory: " + basePath, e);
        }
    }

    private class Transaction implements TransactionHandle {

        private final Set<String> updatedStores = Sets.newHashSet();


        private final Map<String, IndexSearcher> searchers = new HashMap<String, IndexSearcher>(4);


        private synchronized IndexSearcher getSearcher(String store) throws StorageException {
            IndexSearcher searcher = searchers.get(store);
            if (searcher == null) {
                IndexReader reader = null;
                try {
                    reader = DirectoryReader.open(getStoreDirectory(store));
                    searcher = new IndexSearcher(reader);
                } catch (IndexNotFoundException e) {
                    searcher = null;
                } catch (IOException e) {
                    throw new PermanentStorageException("Could not open index reader on store: " + store, e);
                }
                searchers.put(store, searcher);
            }
            return searcher;
        }

        public void postCommit() throws StorageException {
            close();
            searchers.clear();
        }


        @Override
        public void commit() throws StorageException {
            close();
        }

        @Override
        public void rollback() throws StorageException {
            close();
        }

        @Override
        public void flush() throws StorageException {

        }

        private void close() throws StorageException {
            try {
                for (IndexSearcher searcher : searchers.values()) {
                    if (searcher != null) searcher.getIndexReader().close();
                }
            } catch (IOException e) {
                throw new PermanentStorageException("Could not close searcher", e);
            }
        }
    }

}
TOP

Related Classes of com.thinkaurelius.titan.diskstorage.lucene.LuceneIndex$Transaction

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.