Package er.neo4jadaptor.query.lucene

Source Code of er.neo4jadaptor.query.lucene.LuceneOptimizer

package er.neo4jadaptor.query.lucene;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.PropertyContainer;
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.index.Index;
import org.neo4j.graphdb.index.IndexHits;

import com.webobjects.eoaccess.EOAttribute;
import com.webobjects.eoaccess.EOEntity;
import com.webobjects.eoaccess.EORelationship;
import com.webobjects.eocontrol.EOAndQualifier;
import com.webobjects.eocontrol.EOKeyValueQualifier;
import com.webobjects.eocontrol.EOOrQualifier;
import com.webobjects.eocontrol.EOQualifier;
import com.webobjects.foundation.NSArray;

import er.neo4jadaptor.ersatz.lucene.LuceneTranslator;
import er.neo4jadaptor.ersatz.neo4j.Neo4JTranslator;
import er.neo4jadaptor.query.Results;
import er.neo4jadaptor.query.lucene.results.LuceneIndexHits;


/**
* <p>
* Consider EOQualifier:
* <pre>
* (((service.dvbOriginalNetworkId = 1536) and (service.dvbTransportStreamId = 2069) and (service.dvbServiceId = 19322)) and (endDateTime >= (com.webobjects.foundation.NSTimestamp)'2012-06-07 04:00:00 Etc/GMT'))
* </pre>
*
* LuceneQueryConverter doesn't support relationships so it would convert it to:
* <pre>
* +(+(+#_type:WEPGEvent +#_type:WEPGEvent +#_type:WEPGEvent) +endDateTime:[2012060706:00:00:000 TO ZZZZ]) +#_type:WEPGEvent
* </pre>
*
* which would return more results than actually matching the original query. We could optimize lucene query by first searching for services
* matching
* <pre>
* dvbOriginalNetworkId = 1536 and dvbTransportStreamId = 2069 and dvbServiceId = 19322
* </pre>
* and then replace
* ((service.dvbOriginalNetworkId = 1536) and (service.dvbTransportStreamId = 2069) and (service.dvbServiceId = 19322))
* part with matching service IDs, so we could get in result something like:
* +(+(+(+#_type:WEPGEvent +#_type:WEPGEvent +#_type:WEPGEvent) +endDateTime:[2012060706:00:00:000 TO ZZZZ]) +#_type:WEPGEvent) +(serviceId:00000000000000002026)
* instead.
* </p>
*
* <p>
* Due to the fact that optimization process makes another Lucene query which has a around-constant overhead, we perform optimization
* attempt only if the initial number of results exceeds treshold of {@value #OPTIMIZATION_TRESHOLD}.
* </p>
*
* TODO: refactor
*
* @author Jedrzej Sobanski
*
* @param <Type>
*/
public class LuceneOptimizer <Type extends PropertyContainer> {
  private static final org.apache.log4j.Logger log = org.apache.log4j.Logger.getLogger(LuceneOptimizer.class);
 
  /**
   * Perform optimization attempt only if there are more then this many results.
   */
  public static final int OPTIMIZATION_TRESHOLD = 1000;
 
  private final Index<Type> index;
 
  public LuceneOptimizer(Index<Type> index) {
    this.index = index;
  }
 
  public static boolean canBeOptimized(IndexHits<? extends PropertyContainer> hits, EOQualifier qualifier) {
    return hits.size() > OPTIMIZATION_TRESHOLD && false == containsAlternatives(qualifier);
  }
 
  private static <Type extends PropertyContainer> long getObjectId(Type t) {
    if (t instanceof Node) {
      return ((Node) t).getId();
    } else if (t instanceof Relationship) {
      return ((Relationship) t).getId();
    } else {
      throw new IllegalArgumentException("Doesn't know type of object " + t);
    }
  }
 
  public Results<Type> optimize(Query q, EOEntity entity, EOQualifier qualifier) {
    Map<EORelationship, List<Type>> relToNodes = relationshipsToNodes(entity, qualifier);
    BooleanQuery boolQuery = new BooleanQuery();
    IndexHits<Type> hits;
   
    // TODO: we make implicit assumption here that q is only conjunction
    boolQuery.add(q, Occur.MUST);
   
    for (EORelationship r : relToNodes.keySet()) {
      NSArray<EOAttribute> srcAtts = r.sourceAttributes();
      BooleanQuery relationshipQuery = new BooleanQuery();
      EOAttribute att;
     
      if (srcAtts.count() != 1) {
        throw new IllegalArgumentException();
      } else {
        att = srcAtts.get(0);
      }

      for (Type n : relToNodes.get(r)) {
        Object ultimateId = Neo4JTranslator.instance.toNeutralValue(getObjectId(n), att);
        String luceneValue = LuceneTranslator.instance.fromNeutralValue(ultimateId, att);
        Term term = new Term(att.name(), luceneValue);
        TermQuery termQuery = new TermQuery(term);
       
        if (relationshipQuery.clauses().size() >= BooleanQuery.getMaxClauseCount()) {
          BooleanQuery.setMaxClauseCount(BooleanQuery.getMaxClauseCount() + 10);
        }
        relationshipQuery.add(termQuery, Occur.SHOULD);
      }
      boolQuery.add(relationshipQuery, Occur.MUST);
    }
    hits = index.query(boolQuery);

    if (log.isDebugEnabled()) {
      log.debug("Querying lucene with " + q);
    }
   
    return new LuceneIndexHits<Type>(hits);
  }
 
  private Map<EORelationship, List<Type>> relationshipsToNodes(EOEntity entity, EOQualifier qualifier) {
    Map<EORelationship, List<EOKeyValueQualifier>> relToQualifiers = new HashMap<EORelationship, List<EOKeyValueQualifier>>();
    Map<EORelationship, List<Type>> relToNodes = new HashMap<EORelationship, List<Type>>();
    LuceneQueryConverter luceneConverter = new LuceneQueryConverter();
   
    collectUsedRelationships(relToQualifiers, entity, qualifier);
   
    for (EORelationship r : relToQualifiers.keySet()) {
      NSArray<EOQualifier> qualifiers = new NSArray<EOQualifier>(relToQualifiers.get(r));
      Query luceneQuery;
      List<Type> nodes = new ArrayList<Type>();
     
      luceneQuery = luceneConverter.fullQuery(r.destinationEntity(), new EOAndQualifier(qualifiers));
      for (Type node : index.query(luceneQuery)) {
        nodes.add(node);
      }
      relToNodes.put(r, nodes);
    }
    return relToNodes;
  }

  private static boolean containsAlternatives(EOQualifier q) {
    if (q instanceof EOOrQualifier) {
      return true;
    }
    if (q instanceof EOAndQualifier) {
      for (EOQualifier q1 : ((EOAndQualifier) q).qualifiers()) {
        if (containsAlternatives(q1)) {
          return true;
        }
      }
    }
    return false;
  }
 
  /**
   * Qualifier MUST NOT contain any alternatives
   * @param q
   * @return
   *
   * TODO: it doesn't look good - supports only AND and key-value qualifiers
   */
  private void collectUsedRelationships(Map<EORelationship, List<EOKeyValueQualifier>> result, EOEntity e, EOQualifier q) {
    if (q instanceof EOKeyValueQualifier) {
      EOKeyValueQualifier kvq = (EOKeyValueQualifier) q;
      String key = kvq.key();
      String [] segments = key.split("\\.");
     
      if (segments.length == 2) {
        EORelationship r = e.relationshipNamed(segments[0]);
       
        if (r != null && false == r.isToMany() && false == r.isFlattened()) {
          List<EOKeyValueQualifier> list = result.get(r);
         
          if (list == null) {
            list = new ArrayList<EOKeyValueQualifier>();
            result.put(r, list);
          }
          list.add(new EOKeyValueQualifier(segments[1], kvq.selector(), kvq.value()));
        }
      } else {
        // ignore, too complex
      }
    } else if (q instanceof EOAndQualifier) {
      for (EOQualifier q1 : ((EOAndQualifier) q).qualifiers()) {
        collectUsedRelationships(result, e, q1);
      }
    } else {
      throw new IllegalArgumentException("Qualifiers different than " + EOAndQualifier.class.getSimpleName() + " or " + EOKeyValueQualifier.class.getSimpleName() + " are not supported");
    }
  }
}
TOP

Related Classes of er.neo4jadaptor.query.lucene.LuceneOptimizer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.