Package com.flaptor.indextank.index.rti.inverted

Source Code of com.flaptor.indextank.index.rti.inverted.InvertedIndex

/*
* Copyright (c) 2011 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package com.flaptor.indextank.index.rti.inverted;

import java.util.BitSet;
import java.util.Iterator;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ConcurrentNavigableMap;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.atomic.AtomicInteger;

import com.flaptor.indextank.Indexer;
import com.flaptor.indextank.index.DocId;
import com.flaptor.indextank.index.Document;
import com.flaptor.indextank.index.QueryMatcher;
import com.flaptor.indextank.index.ScoredMatch;
import com.flaptor.indextank.index.TopMatches;
import com.flaptor.indextank.index.scorer.FacetingManager;
import com.flaptor.indextank.index.scorer.Scorer;
import com.flaptor.indextank.index.term.DocTermMatch;
import com.flaptor.indextank.index.term.TermMatcher;
import com.flaptor.indextank.index.term.query.RawMatch;
import com.flaptor.indextank.index.term.query.TermBasedQueryMatcher;
import com.flaptor.indextank.query.AToken;
import com.flaptor.indextank.query.IndexEngineParser;
import com.flaptor.indextank.query.Query;
import com.flaptor.indextank.util.AbstractSkippableIterable;
import com.flaptor.indextank.util.AbstractSkippableIterator;
import com.flaptor.indextank.util.SkippableIterable;
import com.flaptor.indextank.util.SkippableIterator;
import com.flaptor.indextank.util.Skippables;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.collect.Iterables;
import com.google.common.collect.MapMaker;
import com.google.common.collect.Maps;
import com.google.common.collect.SortedSetMultimap;
import com.google.common.collect.TreeMultimap;


public class InvertedIndex implements Indexer, QueryMatcher, TermMatcher {

  private final int maxDocCount;
  private final DocId[] docids;
  private final BitSet internalDeletes;
  private final AtomicInteger docCount;
 
  private final ConcurrentMap<DocId, Integer> docidsIndexes = new MapMaker().makeMap();
  private final ConcurrentNavigableMap<Key, DocTermMatchList> invertedIndex = new ConcurrentSkipListMap<Key, DocTermMatchList>();
  private final ConcurrentMap<DocId, DocId> deletes = new MapMaker().makeMap();
  //private final ConcurrentHashMap<DocId, DocId> deletes = Maps.newConcurrentHashMap();
  private final QueryMatcher matcher;
  private final IndexEngineParser parser;

  public InvertedIndex(Scorer scorer, IndexEngineParser parser, int maxDocCount, FacetingManager facetingManager) {
        Preconditions.checkArgument(maxDocCount > 0);
    this.maxDocCount = maxDocCount;
    this.docids = new DocId[maxDocCount];
    this.internalDeletes = new BitSet(maxDocCount);
    this.docCount = new AtomicInteger(0);
    this.matcher = new TermBasedQueryMatcher(scorer, this, facetingManager);
        this.parser = parser;
  }
 
  public void add(String sdocid, final Document document) {
    int idx = docCount.getAndIncrement();
    if (idx < maxDocCount) {
        DocId docid = new DocId(sdocid);
      docids[idx] = docid;
      Integer oldIdx = docidsIndexes.put(docid, idx);
      if (oldIdx != null) {
        internalDel(oldIdx);
      }
      internalAdd(idx, document);
    } else {
      throw new IllegalStateException("MaxDocCount (" + maxDocCount + ") reached. Cannot add more documents.");
    }
  }

  public void del(String sdocid) {
      DocId docid = new DocId(sdocid);
      Integer idx = docidsIndexes.get(docid);
      if (idx != null) {
        internalDel(idx);
      } else {
        deletes.put(docid, docid);
      }
    }

  private void internalAdd(int idx, final Document document) {
    for (String field : document.getFieldNames()) {
      Iterator<AToken> tokens = parser.parseDocumentField(field, document.getField(field));
      SortedSetMultimap<String, Integer> termPositions = TreeMultimap.create();
      int tokenCount = 0;
      while (tokens.hasNext()) {
        tokenCount++;
        AToken token = tokens.next();
        termPositions.put(token.getText(), token.getPosition());
      }
       
      for (String term : termPositions.keySet()) {
        Key key = new Key(field, term);
        SortedSet<Integer> positionsSet = termPositions.get(term);
                int[] positions = new int[positionsSet.size()];
                int p = 0;
                for (Integer i : positionsSet) {
                    positions[p++] = i;
                }
        DocTermMatchList original = invertedIndex.putIfAbsent(key, new DocTermMatchList(idx, positions, tokenCount));
        if (original != null) {
          original.add(idx, positions, tokenCount);
        }
      }
    }
  }

  private void internalDel(int idx) {
    internalDeletes.set(idx);
  }
 
  public SkippableIterable<DocTermMatch> getMatches(String field, String term) {
    DocTermMatchList docList = invertedIndex.get(new Key(field, term));
    if (docList == null) {
      return Skippables.emptyIterable();
    } else {
      return Skippables.filter(docList, notDeletedPredicate());
    }
  }

  @Override
  public NavigableMap<String, SkippableIterable<DocTermMatch>> getMatches(String field, String termFrom, String termTo) {
      Key leftBoundary = new Key(field, termFrom);
      Key rightBoundary = new Key(field, termTo);
     
      ConcurrentNavigableMap<Key, DocTermMatchList> range = invertedIndex.subMap(leftBoundary, rightBoundary);
     
      NavigableMap<String, SkippableIterable<DocTermMatch>> result = new TreeMap<String, SkippableIterable<DocTermMatch>>();
     
      int numberOfTerms = 0;
      for (Entry<Key, DocTermMatchList> entry : range.entrySet()) {
            result.put(entry.getKey().term, Skippables.filter(entry.getValue(), notDeletedPredicate()));
            numberOfTerms++;
            if (numberOfTerms >= 1000) {
                break;
            }
        }
     
      return result;
  }
 
 
  private Predicate<DocTermMatch> notDeletedPredicate() {
    return new Predicate<DocTermMatch>() {
      @Override
      public boolean apply(DocTermMatch item) {
        return !internalDeletes.get(item.getRawId());
      }
    };
  }

  public boolean hasChanges(DocId docid) {
      return docidsIndexes.containsKey(docid) || deletes.containsKey(docid);
  }

  @Override
  public Iterable<ScoredMatch> decode(Iterable<RawMatch> rawMatches, final double boostedNorm) {
    return Iterables.transform(rawMatches, new Function<RawMatch, ScoredMatch>() {
            @Override
            public ScoredMatch apply(RawMatch rawMatch) {
                //System.out.println("RESULT: "+rawMatch.getNormalizedScore());
                return new ScoredMatch(rawMatch.getBoostedScore() / boostedNorm, docids[rawMatch.getRawId()]);
            }
        });
  }
 
  /* QueryMatcher interface - delegates in internal matcher instance */

  public TopMatches findMatches(Query query, int limit, int scoringFunctionIndex) throws InterruptedException {
    return matcher.findMatches(query, limit, scoringFunctionIndex);
  }

  public TopMatches findMatches(Query query, Predicate<DocId> idFilter,
      int limit, int scoringFunctionIndex) throws InterruptedException {
    return matcher.findMatches(query, idFilter, limit, scoringFunctionIndex);
  }

    @Override
    public SkippableIterable<Integer> getAllDocs() {
        return new AbstractSkippableIterable<Integer>() {
            @Override
            public SkippableIterator<Integer> iterator() {
                return new AbstractSkippableIterator<Integer>() {
                    int current = -1;
                    @Override
                    public void skipTo(int i) {
                        current = i-1;
                    }
                   
                    @Override
                    protected Integer computeNext() {
                        while (++current < docCount.get()) {
                            if (!internalDeletes.get(current)) {
                                return current;
                            }
                        }
                        return endOfData();
                    }
                };
            }
        };
    }

    @Override
    public int countMatches(Query query) throws InterruptedException {
        return matcher.countMatches(query);
    }

    @Override
    public int countMatches(Query query, Predicate<DocId> idFilter) throws InterruptedException {
        return matcher.countMatches(query, idFilter);
    }

    public Map<String, String> getStats(String prefix) {
        Map<String, String> stats = Maps.newHashMap();
        stats.put(prefix + "size", String.valueOf(docCount.get()));
        stats.put(prefix + "terms", String.valueOf(invertedIndex.size()));
        stats.put(prefix + "deletes", String.valueOf(deletes.size()));
        stats.put(prefix + "internal_deletes", String.valueOf(internalDeletes.cardinality()));
        return stats;
    }
 
}
TOP

Related Classes of com.flaptor.indextank.index.rti.inverted.InvertedIndex

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.