Package com.ikanow.infinit.e.api.knowledge.aliases

Examples of com.ikanow.infinit.e.api.knowledge.aliases.AliasLookupTable


      searchOptions.addFields(EntityFeaturePojo.linkdata_);     
    }

    // Initial alias handling:

    AliasLookupTable aliasTable = null;
    HashMap<String, SearchSuggestPojo> aliasResults = null;
    if (!bWantNoAlias) {
      AliasManager aliasManager = AliasManager.getAliasManager();
      if (null != aliasManager) {
        aliasTable = aliasManager.getAliasLookupTable(communityIdStrList, communityIdStrs, null, userIdStr);
      }
    }
    //TESTED

    // Also create an internal Lucene index for aliases, in case any of them do not have actual entities representing them
    List<EntityFeaturePojo> extraEntries = null;
    if (null != aliasTable) {
      extraEntries = checkAliasMasters(aliasTable, escapedterm);
    }
    // (end initial alias handling)

    int nDesiredSize = 20;
    if (null == aliasTable) {   
      searchOptions.setSize(nDesiredSize); // will forward all 20
    }
    else {
      searchOptions.addFields(EntityFeaturePojo.index_);
      searchOptions.setSize(3*nDesiredSize); // will forward top 20 after de-aliasing

      aliasResults = new HashMap<String, SearchSuggestPojo>();
      // (We use this to ensure we only include each entity once after aliasing)
    }
    //TESTED

    // Perform the search

    SearchResponse rsp = gazIndex.doQuery(queryObj, searchOptions);

    // Format the return values

    SearchHit[] docs = rsp.getHits().getHits();     
    DimensionListPojo dimlist = new DimensionListPojo();
    int nDocsAdded = 0;
   
    if (null != extraEntries) { // Put the alias masters at the top:
      //DEBUG
      //System.out.println(Arrays.toString(extraEntries.toArray()));
      for (EntityFeaturePojo alias: extraEntries) {
        SearchSuggestPojo sp = new SearchSuggestPojo();
        if (null != alias.getDimension()) {
          sp.setDimension(alias.getDimension().toString());
        }
        else {
          sp.setDimension("What");
        }
        sp.setValue(alias.getDisambiguatedName());
        sp.setType(alias.getType());
        if (bIncludeGeo) {
          sp.setGeotag(alias.getGeotag());
        }
        sp.setOntology_type(alias.getOntology_type());
        dimlist.addSearchSuggestPojo(sp);
      }
    }//TESTED (inc geo)
   
    if (null != docs)
    {
      for (SearchHit hit: docs)
      {
        SearchHitField shf = hit.field(EntityFeaturePojo.disambiguated_name_);
        if (null == shf) { // robustness check, sometimes if the harvester goes wrong this field might be missing
          continue;
        }
        String disname = (String) shf.value();
        String type = (String) hit.field(EntityFeaturePojo.type_).value();
        String dimension = (String) hit.field(EntityFeaturePojo.dimension_).value();
        SearchSuggestPojo sp = new SearchSuggestPojo();       

        sp.setValue(disname);
        sp.setDimension(dimension);
        sp.setType(type);
        if (bIncludeGeo)
        {
          SearchHitField loc = hit.field(EntityFeaturePojo.geotag_);
          if ( loc != null )
            sp.setLocFromES((String) loc.value());
          SearchHitField ont = hit.field(EntityFeaturePojo.ontology_type_);
          if ( ont != null )
            sp.setOntology_type((String)ont.value());
        }
        if (bIncludeLinkdata) {
          SearchHitField linkdata = hit.field(EntityFeaturePojo.linkdata_);
          if ( linkdata != null )
            sp.setLinkdata(linkdata.values());
        }               

        // More alias handling
        String index = null;
        if (null != aliasTable) {
          index = (String) hit.field(EntityFeaturePojo.index_).value();
          EntityFeaturePojo alias = aliasTable.getAliasMaster(index);
          if (null != alias) { // Found!
            if (alias.getIndex().equalsIgnoreCase("discard")) { // Discard this entity
              continue;
            }
            else if ((null != alias.getDisambiguatedName()) && (null != alias.getType())) {
View Full Code Here


    {
      // Community ids, needed in a couple of places
      String[] communityIdStrs = SocialUtils.getCommunityIds(userIdStr, communityIdStrList);

      // Initial alias handling:
      AliasLookupTable aliasTable = null;
      // Initial alias handling:     
      if (!bWantNoAlias) {
        AliasManager aliasManager = AliasManager.getAliasManager();
        if (null != aliasManager) {
          aliasTable = aliasManager.getAliasLookupTable(communityIdStrList, communityIdStrs, null, userIdStr);
        }
      }//TESTED                   

      ElasticSearchManager esm = ElasticSearchManager.getIndex(assocIndex_);
      SearchRequestBuilder searchOptions = esm.getSearchOptions();
      BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
      boolean bExtraQueryTerms = false;
      String term = "";
      if ( !ent1.equals("null") )
      {
        if ( field.equals(AssociationFeaturePojo.entity1_) )
          term = ent1;
        else {
          bExtraQueryTerms = true;
          EntityFeaturePojo alias = null;
          if (null != aliasTable) {
            alias = aliasTable.getAliasMaster(ent1);
          }
          if (null != alias) { // Found!
            boolQuery.must(QueryBuilders.termsQuery(AssociationFeaturePojo.entity1_index_, alias.getAlias().toArray()));
          }
          else {
            boolQuery.must(QueryBuilders.termQuery(AssociationFeaturePojo.entity1_index_, ent1));
          }//TESTED
        }
      }
      if ( !verb.equals("null") )
      {
        if ( field.equals(AssociationFeaturePojo.verb_) )
          term = verb;
        else
        {
          bExtraQueryTerms = true;
          boolQuery.must(QueryBuilders.queryString(new StringBuffer("+").append(verb.replaceAll("\\s+", " +")).toString()).
              defaultField(AssociationFeaturePojo.verb_));
        }
      }
      if ( !ent2.equals("null") )
      {
        if ( field.equals(AssociationFeaturePojo.entity2_) )
          term = ent2;
        else {
          bExtraQueryTerms = true;
          EntityFeaturePojo alias = null;
          if (null != aliasTable) {
            alias = aliasTable.getAliasMaster(ent2);
          }
          if (null != alias) { // Found!
            boolQuery.must(QueryBuilders.termsQuery(AssociationFeaturePojo.entity2_index_, alias.getAlias().toArray()));
          }
          else {
            boolQuery.must(QueryBuilders.termQuery(AssociationFeaturePojo.entity2_index_, ent2));
          }
        }//TESTED (cut and paste from entity1)
     

      String escapedterm = null;
      StandardTokenizer st = new StandardTokenizer(Version.LUCENE_30, new StringReader(ContentUtils.stripDiacritics(term)));
      CharTermAttribute termAtt = st.addAttribute(CharTermAttribute.class);
      StringBuffer sb = new StringBuffer();
      try {
        try {
          st.reset();
          while (st.incrementToken()) {
            if (sb.length() > 0) {
              sb.append(" +");
            }
            else {
              sb.append('+');           
            }
            sb.append(luceneEncodeTerm(termAtt.toString()));
          }
        }
        finally {
          st.close();
        }
      } catch (IOException e) {
        e.printStackTrace();
      }     
      if (!term.endsWith(" ") || (0 == sb.length())) { // Could be in the middle of typing, stick a * on the end
        sb.append('*');
      }//TESTED     

      escapedterm = sb.toString();
     
      // Also create an internal Lucene index for aliases, in case any of them do not have actual entities representing them
      List<EntityFeaturePojo> extraEntries = null;
      BoolQueryBuilder extraQueryTerms = null;
      if (field.startsWith("entity")) {
        String indexField = field.startsWith("entity1") ? "entity1_index" : "entity2_index";
        if (null != aliasTable) {
          extraEntries = checkAliasMasters(aliasTable, escapedterm);
        }
        if (null != extraEntries) {
          extraQueryTerms = QueryBuilders.boolQuery();
          int nExtraTerms = 0;
          Iterator<EntityFeaturePojo> aliasIt = extraEntries.iterator();
          while (aliasIt.hasNext()) {
            EntityFeaturePojo alias = aliasIt.next();           
            nExtraTerms += alias.getAlias().size();
           
            if (!bExtraQueryTerms && (nExtraTerms > 20)) { // If not filtering on event type we'll be more aggressive
              break;
            }//TESTED
            if (bExtraQueryTerms && (nExtraTerms > 60)) { // If the number of terms gets too large bail anyway
              break;
            }//TESTED
           
            extraQueryTerms.should(QueryBuilders.termsQuery(indexField, alias.getAlias().toArray()));
            aliasIt.remove();
           
          }//end loop over entities
        }//if found new aliases
       
      }//(if this is an entity lookup) TESTED - including breaking out because of # of terms
     
      // (end initial alias handling)
     
      if (null == extraQueryTerms) {
        boolQuery.must(QueryBuilders.queryString(escapedterm).defaultField(field));
      }
      else {//(in this case combine the escaped term with the aliases
        extraQueryTerms.should(QueryBuilders.queryString(escapedterm).defaultField(field));
        boolQuery.must(extraQueryTerms);
      }//TESTED
      boolQuery.must(QueryBuilders.termsQuery(AssociationFeaturePojo.communityId_, communityIdStrs));

      searchOptions.addSort(AssociationFeaturePojo.doccount_, SortOrder.DESC);

      // Work out which fields to return:
      //TODO (INF-1234) need to work out what to do with quotations and similar here (ie entityX without entityX_index)
      String returnfield;
      boolean bReturningEntities = true;
      if ( field.equals(AssociationFeaturePojo.entity1_) ) {
        returnfield = AssociationFeaturePojo.entity1_index_;
        searchOptions.addFields( AssociationFeaturePojo.entity1_index_, AssociationFeaturePojo.doccount_);
      }
      else if ( field.equals(AssociationFeaturePojo.entity2_)) {
        returnfield = AssociationFeaturePojo.entity2_index_;
        searchOptions.addFields( AssociationFeaturePojo.entity2_index_, AssociationFeaturePojo.doccount_);
      }
      else {
        bReturningEntities = false;
        returnfield = AssociationFeaturePojo.verb_;
        searchOptions.addFields( AssociationFeaturePojo.verb_, AssociationFeaturePojo.verb_category_,  AssociationFeaturePojo.doccount_);
      }

      int nNumSuggestionsToReturn = 20;
      if (bReturningEntities && (null != aliasTable)) {
        searchOptions.setSize(3*nNumSuggestionsToReturn); // we're going to remove some duplicates so get more than we need
      }
      else { // normal case
        searchOptions.setSize(nNumSuggestionsToReturn);
      }

      SearchResponse rsp = esm.doQuery(boolQuery, searchOptions);
      SearchHit[] docs = rsp.getHits().getHits();

      //Currently this code takes the results and puts
      //them into a set so there are no duplicates
      //duplicates occur for example when you search for
      //obama you get obama/quotation/quote1 and obama/travel/spain
      //may want to work this differnt, or atleast sum up
      //frequency
      Set<String> suggestions = new HashSet<String>();

      for (SearchHit hit: docs)
      {
        SearchHitField retField = hit.field(returnfield); // (this can be null in theory/by mistake)
        if (null != retField) {
          String suggestion = (String) retField.value();
          if (bReturningEntities && (null != aliasTable))
          {
            // More alias handling
            EntityFeaturePojo alias = aliasTable.getAliasMaster(suggestion);
            if (null != alias) { // Found!
              if (alias.getIndex().equalsIgnoreCase("discard")) { // Discard this entity
                continue;
              }
              else {
View Full Code Here

TOP

Related Classes of com.ikanow.infinit.e.api.knowledge.aliases.AliasLookupTable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.