Examples of AssociationPojo


Examples of com.ikanow.infinit.e.data_model.store.document.AssociationPojo

        }
      }//TESTED
      if (null != doc.getAssociations()) {
        Iterator<AssociationPojo> evtIt = doc.getAssociations().iterator();
        while (evtIt.hasNext())  { 
          AssociationPojo evt = evtIt.next();

          if (null != deletedEntities) { // check we're not using these entities in our associations
            if (null != evt.getEntity1_index() && deletedEntities.contains(evt.getEntity1_index())) {
              evtIt.remove();
              continue;
            }//TESTED (cut and paste from tested code below)
            if (null != evt.getEntity2_index() && deletedEntities.contains(evt.getEntity2_index())) {
              evtIt.remove();
              continue;
            }//TESTED
            if (null != evt.getGeo_index() && deletedEntities.contains(evt.getGeo_index())) {
              evt.setGeo_index(null);
            }//TESTED (trivial)
           
          }//TESTED
         
          boolean bAlreadyCountedFreq = false;         
          if ((null == evt.getEntity1_index()) && (null == evt.getEntity2_index())) {//skip this event if there is no ent1/en2
            continue;
          }
          // Calc index (this is not remotely unique, of course, but good enough for now...):
          String sEventFeatureIndex = AssociationAggregationUtils.getEventFeatureIndex(evt);
          evt.setIndex(sEventFeatureIndex); //(temp save for applyAggregationToDocs below)
         
          // Use index:
          Map<ObjectId, AssociationFeaturePojo> eventInCommunity = _aggregatedEvents.get(sEventFeatureIndex);
          if (null == eventInCommunity) {
            eventInCommunity = new HashMap<ObjectId, AssociationFeaturePojo>();
            _aggregatedEvents.put(sEventFeatureIndex, eventInCommunity);
            intraDocStore.add(sEventFeatureIndex);
          }
          else if (intraDocStore.contains(sEventFeatureIndex)) {
            bAlreadyCountedFreq = true;
          }
          else {
            intraDocStore.add(sEventFeatureIndex);
          }
          ObjectId communityId = doc.getCommunityId();
          if (null != communityId) {
            AssociationFeaturePojo feature = eventInCommunity.get(communityId);
            if (null == feature) {
              feature = new AssociationFeaturePojo();
              feature.setCommunityId(communityId);
              feature.setIndex(sEventFeatureIndex);
              feature.setEntity1_index(evt.getEntity1_index());
              feature.setEntity2_index(evt.getEntity2_index());
              feature.setVerb_category(evt.getVerb_category());
              feature.setAssociation_type(evt.getAssociation_type());
              feature.setGeo_index(evt.getGeo_index());
              eventInCommunity.put(feature.getCommunityId(), feature);
            }
            if (!bAlreadyCountedFreq) {
              feature.setDoccount(feature.getDoccount() + 1);
            }
            if (null != evt.getEntity1_index()) {
              feature.addEntity1(evt.getEntity1_index());
            }
            if (null != evt.getEntity2_index()) {
              feature.addEntity2(evt.getEntity2_index());
            }
            if (null != evt.getVerb()) {
              feature.addVerb(evt.getVerb());
            }
            if (null != evt.getEntity1()) {
              // Restrict length of entity string, in case it's a quotation
              if (evt.getEntity1().length() > AssociationFeaturePojo.entity_MAXSIZE) {
                int i = AssociationFeaturePojo.entity_MAXSIZE;
                for (; i > AssociationFeaturePojo.entity_MAXSIZE - 10; --i) {
                  char c = evt.getEntity1().charAt(i);
                  if (c < 0x30) {
                    break;
                  }
                }
                feature.addEntity1(evt.getEntity1().substring(0, i+1));
              }
              else {
                feature.addEntity1(evt.getEntity1());
              }//TESTED (both clauses, 2.1.4.3a)
            }
            if (null != evt.getEntity2()) {
              // Restrict length of entity string, in case it's a quotation
              if (evt.getEntity2().length() > AssociationFeaturePojo.entity_MAXSIZE) {
                int i = AssociationFeaturePojo.entity_MAXSIZE;
                for (; i > AssociationFeaturePojo.entity_MAXSIZE - 10; --i) {
                  char c = evt.getEntity2().charAt(i);
                  if (c < 0x30) {
                    break;
                  }
                }
                feature.addEntity2(evt.getEntity2().substring(0, i+1));
              }
              else {
                feature.addEntity2(evt.getEntity2());
              }//TESTED (both clauses, 2.1.4.3a)
            }
          }
        }//(end loop over associations)       
      }//TESTED
View Full Code Here

Examples of com.ikanow.infinit.e.data_model.store.document.AssociationPojo

      AssociationFeaturePojo evt = AssociationFeaturePojo.fromDb(dbo,AssociationFeaturePojo.class);
     
      // If this table has just been rebuilt from the document then the indexes are all wrong ...
      // recalculate and save
      if ('#' == evt.getIndex().charAt(0)) {
        AssociationPojo singleEvt = new AssociationPojo();
        singleEvt.setEntity1_index(evt.getEntity1_index());
        singleEvt.setEntity2_index(evt.getEntity2_index());
        singleEvt.setVerb_category(evt.getVerb_category());
        singleEvt.setGeo_index(evt.getGeo_index());
        evt.setIndex(AssociationAggregationUtils.getEventFeatureIndex(singleEvt));
        eventFeatureDB.update(new BasicDBObject("_id", dbo.get("_id")),
                      new BasicDBObject(MongoDbManager.set_,
                          new BasicDBObject(AssociationFeaturePojo.index_, evt.getIndex())), false, true);
          // (has to be a multi-update even though it's unique because it's sharded on index)
View Full Code Here

Examples of com.ikanow.infinit.e.data_model.store.document.AssociationPojo

          // For now just re-process these into DB objects since we know that works...
          rawEventObjects = new BasicDBList();
        }
        for ( JsonNode eventNode : eventNodes )
        {         
          AssociationPojo event = parseEvent(eventNode);
          //remove useless events (an event is useless if it only has a verb (guessing currently)
          if ( null != event )
          {
            event = removeUselessEvents(event);
            if ( null != event )
View Full Code Here

Examples of com.ikanow.infinit.e.data_model.store.document.AssociationPojo

   * @param current_node
   * @return
   */
  public AssociationPojo parseEvent(JsonNode current_node)
  {
    AssociationPojo ep = null;
    //handle the different types on entities
    String entity_type = current_node.get("_type").getTextValue().toLowerCase();
    String curr_ent;
    //find eventschema for this type if one exists
    EventSchemaPojo esp = eventSchemas.get(entity_type);
    if ( esp != null )
    {
      ep = new AssociationPojo();
      //entity 1
      if ( null != esp.entity1column && null != current_node.get(esp.entity1column) )
      {
        JsonNode ent1node = current_node.get(esp.entity1column);
        if ( ent1node.isArray() )
        {
          Iterator<JsonNode> entiter = ent1node.getElements();
          curr_ent = entiter.next().getTextValue().toLowerCase();
          EntityPojo matchEnt1 = findMappedEntityName(curr_ent);
          if ( null != matchEnt1)
          {
            ep.setEntity1(matchEnt1.getActual_name());
            ep.setEntity1_index(createEntityIndex(matchEnt1));
            if ( ep.getGeotag() == null && matchEnt1.getGeotag() != null) //try to set geotag if it already hasn't been
              ep.setGeotag(matchEnt1.getGeotag().deepCopy());
          }
          else
            ep.setEntity1(curr_ent);         
         
          if ( entiter.hasNext())
          {
            curr_ent = entiter.next().getTextValue().toLowerCase();
            EntityPojo matchEnt12 = findMappedEntityName(curr_ent);
            if ( null != matchEnt12 )
            {
              ep.setEntity2(matchEnt12.getActual_name());
              ep.setEntity2_index(createEntityIndex(matchEnt12));
              if ( ep.getGeotag() == null && matchEnt12.getGeotag() != null) //try to set geotag if it already hasn't been
                ep.setGeotag(matchEnt12.getGeotag().deepCopy());
            }
            else
              ep.setEntity2(curr_ent);           
          }
        }
        else
        {
          curr_ent = current_node.get(esp.entity1column).getTextValue().toLowerCase();
          EntityPojo matchEnt1Only = findMappedEntityName(curr_ent);
          if ( null != matchEnt1Only )
          {
            ep.setEntity1(matchEnt1Only.getActual_name());
            ep.setEntity1_index(createEntityIndex(matchEnt1Only));
            if ( ep.getGeotag() == null && matchEnt1Only.getGeotag() != null ) //try to set geotag if it already hasn't been
              ep.setGeotag(matchEnt1Only.getGeotag().deepCopy());
          }
          else
            ep.setEntity1(curr_ent);         
        }
      }     
      //entity 2     
      if ( null != esp.entity2column && null != current_node.get(esp.entity2column)  )
      {
        JsonNode ent2node = current_node.get(esp.entity2column);
        if ( ent2node.isTextual() )
        {
          curr_ent = current_node.get(esp.entity2column).getTextValue().toLowerCase();
          EntityPojo matchEnt2 = findMappedEntityName(curr_ent);
          if ( null != matchEnt2 )
          {
            ep.setEntity2(matchEnt2.getActual_name());
            ep.setEntity2_index(createEntityIndex(matchEnt2));
            if ( ep.getGeotag() == null && matchEnt2.getGeotag() != null ) //try to set geotag if it already hasn't been
              ep.setGeotag(matchEnt2.getGeotag().deepCopy());
          }
          else
            ep.setEntity2(curr_ent);
        }
      }
      //verb and verb category (if there is a verb cat, assign that and then get column value)
      if ( null != esp.verbcategory )
      {
        ep.setVerb_category(esp.verbcategory);
       
        if ( null != esp.verbcolumn && null != current_node.get(esp.verbcolumn) )
        {
          JsonNode verbnode = current_node.get(esp.verbcolumn);
          if ( verbnode.isTextual() )
          {
            ep.setVerb(current_node.get(esp.verbcolumn).getTextValue().toLowerCase());
            EntityPojo verbent = findMappedEntityName(ep.getVerb());
            if ( verbent != null )
              ep.setVerb(verbent.getActual_name());
          }
        }
      }
      else if ( null != esp.verbcolumn && null != current_node.get(esp.verbcolumn) )
      {
        ep.setVerb(current_node.get(esp.verbcolumn).getTextValue().toLowerCase());
      }
      //location
      if ( null != esp.locationcolumn && null != current_node.get(esp.locationcolumn) )
      {
        curr_ent = current_node.get(esp.locationcolumn).getTextValue().toLowerCase();
        EntityPojo geoEnt = findMappedEntityName(curr_ent);
        if ( geoEnt != null && geoEnt.getGeotag() != null )
        {
          ep.setGeo_index(createEntityIndex(geoEnt));       
          ep.setGeotag(geoEnt.getGeotag().deepCopy()); //location always over-rides geotag location
        }
      }     
      //time
      if ( null != esp.timecolumnstart && null != current_node.get(esp.timecolumnstart) )
      {
        curr_ent = current_node.get(esp.timecolumnstart).getTextValue().toLowerCase();       
        if ( null != curr_ent )
        {
          ep.setTime_start(standardizeTime(curr_ent));
          //System.out.println(current_node);
          //add some time parsing to get ranges if possible 
          if ( null != esp.timecolumnend && null != current_node.get(esp.timecolumnend) )
          {
            curr_ent = current_node.get(esp.timecolumnend).getTextValue().toLowerCase();
            String[] times = new String[2];
            times[0] = ep.getTime_start();
            times[1] = curr_ent;
            parseEndDate(times);
            ep.setTime_start(times[0]);
            ep.setTime_end(times[1]);
          }
        }
      }
      //remove geotag if it does not have loc
      if ( ep.getGeotag() != null && ep.getGeotag().lon == null)
        ep.setGeotag(null);
      ep.setAssociation_type(getEventType(ep));
    }
    else
    {
      // It's OK just to use the log for this, at some point could consider passing in HarvestContext
      // so could use the per source logger
View Full Code Here

Examples of com.ikanow.infinit.e.data_model.store.document.AssociationPojo

              // Create an association from the AssociationSpecPojo and document
              JSONObject savedIterator = _iterator; // (just in case this needs to be retained - i don't think it does)
              if (null != _scriptEngine) { // (in case no script engine specified)
                _iterator = currIt;
              }
              AssociationPojo association = getAssociation(newAssoc, null, null, f);
              if (association != null) associations.add(association);
              _iterator = savedIterator;
            }
            //TESTED (including the ${$} escaping)
          }
        }
        // END - Multiplicative/Additive Association Creation

        //
        else if (null != currObj) // Single field iterateOver
        {
          try
          {
            // Check to see if the arrayRoot specified exists in the current doc before proceeding
            // Get array of association records from the specified root element
           
            Object itEl = null;
            try {
              itEl = currObj.get(iterateOver);
            }
            catch (JSONException e) {} // carry on, trapped below...
           
            if (null == itEl) {
              return associations;
            }
            JSONArray assocRecords = null;
            try {
              assocRecords = currObj.getJSONArray(iterateOver);
            }
            catch (JSONException e) {} // carry on, trapped below...
             
            if (null == assocRecords) {
              assocRecords = new JSONArray();
              assocRecords.put(itEl);
            }
            //TESTED           

            // Get the type of object contained in assocRecords[0]
            if (assocRecords.length() > 0) {
              String objType = assocRecords.get(0).getClass().toString();

              // EntityRecords is a simple String[] array of associations
              if (objType.equalsIgnoreCase("class java.lang.String"))
              {
                // Iterate over array elements and extract associations
                for (int i = 0; i < assocRecords.length(); ++i)
                {
                  String field = assocRecords.getString(i);
                  long nIndex = Long.valueOf(i);
                 
                  if (null != esp.getVerb_category()) { // (ie a mandatory field is present)                   
                    AssociationPojo association = getAssociation(esp, field, nIndex, f);
                    if (association != null) associations.add(association);
                  }//TESTED
                 
                  // Does the association break out into multiple associations?
                  if (esp.getAssociations() != null)
                  {
                    // Iterate over the associations and call getAssociations recursively
                    for (AssociationSpecPojo subEsp : esp.getAssociations())
                    { 
                      if (null != subEsp.getIterateOver()) {
                        if (null == subEsp.getCreationCriteriaScript()) {
                          _context.getHarvestStatus().logMessage(new StringBuffer("In iterator ").
                              append(esp.getIterateOver()).append(", trying to loop over field '").
                              append(subEsp.getIterateOver()).append("' in array of primitives.").toString(), true);
                        }
                        else {
                          this.executeEntityAssociationValidation(subEsp.getCreationCriteriaScript(), field, Long.toString(nIndex));
                        }
                        // (any creation criteria script indicates user accepts it can be either)
                      }
                      if (null != subEsp.getVerb_category()) { // (ie a mandatory field is present)                   
                        AssociationPojo association = getAssociation(subEsp, field, nIndex, f);
                        if (association != null) associations.add(association);
                      }
                    }                   
                  }//TESTED (error case)
                }
              }

              // EntityRecords is a JSONArray
              else if (objType.equalsIgnoreCase("class org.json.JSONObject"))
              {
                // Iterate over array elements and extract associations
                for (int i = 0; i < assocRecords.length(); ++i)
                {
                  // Get JSONObject containing association fields and pass assocElement
                  // into the script engine so scripts can access it
                  JSONObject savedIterator = null;
                  if (_scriptEngine != null)
                  {
                    _iterator = savedIterator = assocRecords.getJSONObject(i);
                  }

                  if (null != esp.getVerb_category()) { // (ie a mandatory field is present)                   
                    AssociationPojo association = getAssociation(esp, null, Long.valueOf(i), f);
                    if (association != null) associations.add(association)
                  }//TESTED
                 
                  // Does the association break out into multiple associations?
                  if (esp.getAssociations() != null)
                  {
                    // Iterate over the associations and call getAssociations recursively
                    for (AssociationSpecPojo subEsp : esp.getAssociations())
                    { 
                      _iterator = savedIterator; // (reset this)
                     
                      List<AssociationPojo> subAssocs = getAssociations(subEsp, f, _iterator);
                      for (AssociationPojo e : subAssocs)
                      {
                        associations.add(e);
                      }
                    }
                  }
                 
                }//(else if is json object)
              }//(end if >0 array elements)

              if (_iterator != currObj) { // top level
                _iterator = null;
              }
            }
          }
          catch (Exception e)
          {
            //System.out.println(e.getMessage());
            //DEBUG (don't output log messages per doc)
            //logger.error("Exception: " + e.getMessage(), e);
          }
        }
      }

      //
      else // No iterate over at all
      {
        AssociationPojo association = getAssociation(esp, null, null, f);
        if (association != null) associations.add(association);
      }     

      return associations;
    }
View Full Code Here

Examples of com.ikanow.infinit.e.data_model.store.document.AssociationPojo

  private AssociationPojo getAssociation(AssociationSpecPojo esp, String field, Long count, DocumentPojo f)
  {
    String index = (count != null) ? count.toString() : null;
    try
    {
      AssociationPojo e = new AssociationPojo();
     
      // If the AssociationSpecPojo has a creation criteria script check the association for validity
      if (esp.getCreationCriteriaScript() != null && JavaScriptUtils.containsScript(esp.getCreationCriteriaScript()))
      {
        boolean addAssoc = executeEntityAssociationValidation(esp.getCreationCriteriaScript(), field, index);
        if (!addAssoc) {
          return null;
        }
      }     
     
      boolean bDontResolveToIndices = false; // (can always override to summary)
      if (null != esp.getAssoc_type() && (esp.getAssoc_type().equalsIgnoreCase("summary"))) {
        bDontResolveToIndices = true;
      }

      // Assoc.entity1
      if ((esp.getEntity1() != null) || (esp.getEntity1_index() != null))
      {
        // Association.entity1_index
        if (esp.getEntity1_index() != null)
        {
          if (JavaScriptUtils.containsScript(esp.getEntity1_index()))
          {
            String s = (String)getValueFromScript(esp.getEntity1_index(), field, index);
            if (null != s) e.setEntity1_index(s.toLowerCase());
          }
          else
          {
            if ((_iterator != null) && (esp.getEntity1_index().startsWith("$metadata.") || esp.getEntity1_index().startsWith("${metadata."))) {
              if (_context.isStandalone()) { // (minor message, while debugging only)
                _context.getHarvestStatus().logMessage("Warning: in entity1_index, using global $metadata when iterating", true);
              }
            }
            String s = getFormattedTextFromField(esp.getEntity1_index(), field);
            if (null != s) e.setEntity1_index(s.toLowerCase());
          }
          if (null != e.getEntity1_index()) { // Convert to entity1
            int nTypeIndex = e.getEntity1_index().lastIndexOf('/');
            if (nTypeIndex > 0) {
              e.setEntity1(e.getEntity1_index().substring(0, nTypeIndex));
              if (!_entityMap.contains(e.getEntity1_index())) { // Needs to correlate with an entity
                StringBuffer error =  new StringBuffer("Failed to correlate entity1_index with: ").append(esp.getEntity1_index());
                if (_context.isStandalone()) {
                  error.append(" using ").append(e.getEntity1_index());                 
                }
                _context.getHarvestStatus().logMessage(error.toString(), true);
                e.setEntity1_index(null);             
              }//TESTED (INF1360_test_source.json:test8)
            }
            else { // index must be malformed
              StringBuffer error =  new StringBuffer("Malformed entity1_index with: ").append(esp.getEntity1_index());
              if (_context.isStandalone()) {
                error.append(" using ").append(e.getEntity1_index());                 
              }
              _context.getHarvestStatus().logMessage(error.toString(), true);
              e.setEntity1_index(null);
            }
          }
        }//TESTED (see INF1360_test_source.json:test2)
       
        // entity1       
        if (null != esp.getEntity1()) {
         
          if (JavaScriptUtils.containsScript(esp.getEntity1()))
          {
            e.setEntity1((String)getValueFromScript(esp.getEntity1(), field, index));
          }
          else
          {
            if ((_iterator != null) && (esp.getEntity1().startsWith("$metadata.") || esp.getEntity1().startsWith("${metadata."))) {
              if (_context.isStandalone()) { // (minor message, while debugging only)
                _context.getHarvestStatus().logMessage("Warning: in entity1, using global $metadata when iterating", true);
              }
            }
            e.setEntity1(getFormattedTextFromField(esp.getEntity1(), field));
          }
         
          if (!bDontResolveToIndices && (null == e.getEntity1_index()))
          {
            // Try using the entity.disambiguated name, this isn't perfect because 2 entities with different
            // types can have different dnames, but we'll try and then abandon if we get multiple hits
            int nHits = 0;
            String matchingIndex = null;
            for (EntityPojo entity : f.getEntities())
            {
              if (entity.getDisambiguatedName().equalsIgnoreCase(e.getEntity1()))
              {
                nHits++;
                if (1 == nHits) {
                  matchingIndex = entity.getIndex();
                  e.setEntity1_index(entity.getIndex());
                }
                else if (!matchingIndex.equals(entity.getIndex())) { // Ambiguous reference so bail out
                  StringBuffer error =  new StringBuffer("Failed entity1_index disambiguation with: ").append(esp.getEntity1());
                  if (_context.isStandalone()) {
                    error.append(" using ").append(e.getEntity1());                 
                  }
                  _context.getHarvestStatus().logMessage(error.toString(), true);

                  e.setEntity1_index(null);
                  break;
                }
              }
            } // (end loop across all indices)
          }//TESTED (success and fail cases, see INF1360_test_source.json:test3)
         
        } // (end no entity1_index extracted, entity1 specified)
       
        // Quality checks:
       
        if ((esp.getEntity1() != null) && (null == e.getEntity1()) && (null == esp.getCreationCriteriaScript())) {
          // Specified this (entity1), so going to insist on it
          if (_context.isStandalone()) { // (minor message, while debugging only)
            _context.getHarvestStatus().logMessage(new StringBuffer("Failed to get required entity1 from: ").append(esp.getEntity1()).toString(), true);
          }
          return null;
        }
        if ((esp.getEntity1_index() != null) && (null == e.getEntity1_index()) && (null == esp.getCreationCriteriaScript())) {
          // Specified this (entity1_index), so going to insist on it
          if (_context.isStandalone()) { // (minor message, while debugging only)
            _context.getHarvestStatus().logMessage(new StringBuffer("Failed to get required entity1_index from: ").append(esp.getEntity1_index()).toString(), true);
          }
          return null;
        }
        //TESTED INF1360_test_source:test7 (no criteria), test8 (criteria)
       
      } // (end entity1)
     
      // Assoc.entity2
      if ((esp.getEntity2() != null) || (esp.getEntity2_index() != null))
      {
        // Association.entity2_index
        if (esp.getEntity2_index() != null)
        {
          if (JavaScriptUtils.containsScript(esp.getEntity2_index()))
          {
            String s = (String)getValueFromScript(esp.getEntity2_index(), field, index);
            if (null != s) e.setEntity2_index(s.toLowerCase());
          }
          else
          {
            if ((_iterator != null) && (esp.getEntity2_index().startsWith("$metadata.") || esp.getEntity2_index().startsWith("${metadata."))) {
              if (_context.isStandalone()) { // (minor message, while debugging only)
                _context.getHarvestStatus().logMessage("Warning: in entity2_index, using global $metadata when iterating", true);
              }
            }
            String s = getFormattedTextFromField(esp.getEntity2_index(), field);
            if (null != s) e.setEntity2_index(s.toLowerCase());
          }
          if (null != e.getEntity2_index()) { // Convert to entity2
            int nTypeIndex = e.getEntity2_index().lastIndexOf('/');
            if (nTypeIndex > 0) {
              e.setEntity2(e.getEntity2_index().substring(0, nTypeIndex));
              if (!_entityMap.contains(e.getEntity2_index())) { // Needs to correlate with an entity
                StringBuffer error =  new StringBuffer("Failed to correlate entity2_index with: ").append(esp.getEntity2_index());
                if (_context.isStandalone()) {
                  error.append(" using ").append(e.getEntity2_index());                 
                }
                _context.getHarvestStatus().logMessage(error.toString(), true);
                e.setEntity2_index(null);             
              }//TESTED (INF1360_test_source.json:test8)
            }
            else { // index must be malformed
              StringBuffer error =  new StringBuffer("Malformed entity2_index with: ").append(esp.getEntity2_index());
              if (_context.isStandalone()) {
                error.append(" using ").append(e.getEntity2_index());                 
              }
              _context.getHarvestStatus().logMessage(error.toString(), true);
              e.setEntity2_index(null);
            }
          }
        }//TESTED (see INF1360_test_source.json:test2)
       
        // entity2       
        if (null != esp.getEntity2()) {
         
          if (JavaScriptUtils.containsScript(esp.getEntity2()))
          {
            e.setEntity2((String)getValueFromScript(esp.getEntity2(), field, index));
          }
          else
          {
            if ((_iterator != null) && (esp.getEntity2().startsWith("$metadata.") || esp.getEntity2().startsWith("${metadata."))) {
              if (_context.isStandalone()) { // (minor message, while debugging only)
                _context.getHarvestStatus().logMessage("Warning: in entity2, using global $metadata when iterating", true);
              }
            }
            e.setEntity2(getFormattedTextFromField(esp.getEntity2(), field));
          }
         
          if (!bDontResolveToIndices && (null == e.getEntity2_index()))
          {
            // Try using the entity.disambiguated name, this isn't perfect because 2 entities with different
            // types can have different dnames, but we'll try and then abandon if we get multiple hits
            int nHits = 0;
            String matchingIndex = null;
            for (EntityPojo entity : f.getEntities())
            {
              if (entity.getDisambiguatedName().equalsIgnoreCase(e.getEntity2()))
              {
                nHits++;
                if (1 == nHits) {
                  matchingIndex = entity.getIndex();
                  e.setEntity2_index(entity.getIndex());
                }
                else if (!matchingIndex.equals(entity.getIndex())) { // Ambiguous reference so bail out
                  StringBuffer error =  new StringBuffer("Failed entity2_index disambiguation with: ").append(esp.getEntity2());
                  if (_context.isStandalone()) {
                    error.append(" using ").append(e.getEntity2());                 
                  }
                  _context.getHarvestStatus().logMessage(error.toString(), true);
                 
                  e.setEntity2_index(null);
                  break;
                }
              }
            } // (end loop across all indices)
          }//TESTED (success and fail cases, see INF1360_test_source.json:test3)
         
        } // (end no entity2_index extracted, entity2 specified)
       
        // Quality checks:
       
        if ((esp.getEntity2() != null) && (null == e.getEntity2()) && (null == esp.getCreationCriteriaScript())) {
          // Specified this (entity2), so going to insist on it
          if (_context.isStandalone()) { // (minor message, while debugging only)
            _context.getHarvestStatus().logMessage(new StringBuffer("Failed to get required entity2 from: ").append(esp.getEntity2()).toString(), true);
          }
          return null;
        }
        if ((esp.getEntity2_index() != null) && (null == e.getEntity2_index()) && (null == esp.getCreationCriteriaScript())) {
          // Specified this (entity2_index), so going to insist on it
          if (_context.isStandalone()) { // (minor message, while debugging only)
            _context.getHarvestStatus().logMessage(new StringBuffer("Failed to get required entity2_index from: ").append(esp.getEntity2_index()).toString(), true);
          }
          return null;
        }
        //TESTED INF1360_test_source:test7 (no criteria), test8 (criteria)
       
      } // (end entity2)
     
      // Association.verb
      if (esp.getVerb() != null)
      {
        if (JavaScriptUtils.containsScript(esp.getVerb()))
        {
          e.setVerb((String)getValueFromScript(esp.getVerb(), field, index));
        }
        else
        {
          e.setVerb(getFormattedTextFromField(esp.getVerb(), field));
        }
        if ((null == e.getVerb()) && (null == esp.getCreationCriteriaScript())) {
          // Specified this, so going to insist on it
          if (_context.isStandalone()) { // (minor message, while debugging only)
            _context.getHarvestStatus().logMessage(new StringBuffer("Failed to get required verb from: ").append(esp.getVerb()).toString(), true);
          }
          return null;
        }
      }
     
      // Association.verb_category
      if (esp.getVerb_category() != null)
      {
        if (JavaScriptUtils.containsScript(esp.getVerb_category()))
        {
          String s = (String)getValueFromScript(esp.getVerb_category(), field, index);
          if (null != s) e.setVerb_category(s.toLowerCase());
        }
        else
        {
          String s = getFormattedTextFromField(esp.getVerb_category(), field);
          if (null != s) e.setVerb_category(s.toLowerCase());
        }
      }
      if (null == e.getVerb_category()) { // Needed: verb category (get from verb if not specified)
        _context.getHarvestStatus().logMessage(new StringBuffer("Failed to get required verb_category from: ").append(esp.getVerb_category()).toString(), true);
        return null;
      }
      if (null == e.getVerb()) { // set from verb cat
        e.setVerb(e.getVerb_category());
      }
     
      // Entity.start_time
      if (esp.getTime_start() != null)
      {
        String startTimeString = null;
        if (JavaScriptUtils.containsScript(esp.getTime_start()))
        {
          startTimeString = (String)getValueFromScript(esp.getTime_start(), field, index);
        }
        else
        {
          startTimeString = getFormattedTextFromField(esp.getTime_start(), field);
        }
        if (null != startTimeString) {
          e.setTime_start(DateUtility.getIsoDateString(startTimeString));
        }
        // Allow this to be intrinsically optional
      }
     
      // Entity.end_time
      if (esp.getTime_end() != null)
      {   
        String endTimeString = null;
        if (JavaScriptUtils.containsScript(esp.getTime_end()))
        {
          endTimeString = (String)getValueFromScript(esp.getTime_end(), field, index);
        }
        else
        {
          endTimeString = getFormattedTextFromField(esp.getTime_end(), field);
        }
        if (null != endTimeString) {
          e.setTime_end(DateUtility.getIsoDateString(endTimeString));
        }
        // Allow this to be intrinsically optional
      }
     
     
      // Entity.geo_index
      if (esp.getGeo_index() != null)
      {       
        String geo_entity = null;
        if (JavaScriptUtils.containsScript(esp.getGeo_index()))
        {
          geo_entity = (String)getValueFromScript(esp.getGeo_index(), field, index);
        }
        else
        {
          if ((_iterator != null) && (esp.getGeo_index().startsWith("$metadata.") || esp.getGeo_index().startsWith("${metadata."))) {
            if (_context.isStandalone()) { // (minor message, while debugging only)
              _context.getHarvestStatus().logMessage("Warning: in geo_index, using global $metadata when iterating", true);
            }
          }
          geo_entity = getFormattedTextFromField(esp.getGeo_index(), field);
        }
        if (null != geo_entity) {
          geo_entity = geo_entity.toLowerCase();
          if (geo_entity.lastIndexOf('/') < 0) {
            StringBuffer error =  new StringBuffer("Malformed entity2_index with: ").append(esp.getGeo_index());
            if (_context.isStandalone()) {
              error.append(" using ").append(geo_entity);                 
            }
            _context.getHarvestStatus().logMessage(error.toString(), true);

            geo_entity = null;
          }
          if (!_entityMap.contains(geo_entity)) {
            StringBuffer error =  new StringBuffer("Failed to disambiguate geo_index with: ").append(esp.getGeo_index());
            if (_context.isStandalone()) {
              error.append(" using ").append(geo_entity);                 
            }
            _context.getHarvestStatus().logMessage(error.toString(), true);

            geo_entity = null;           
          }
          //TESTED (INF1360_test_source:test4b)
        }
        //TESTED (INF1360_test_source:test4, test5, test6)
       
        if (null != geo_entity) e.setGeo_index(geo_entity);
        GeoPojo s1 = _geoMap.get(geo_entity);
        e.setGeotag(s1);
        //TESTED (INF1360_test_source:test4)
       
        // Allow this to be intrinsically optional
      }
     
      // Get geo information based on geo tag
      if (e.getGeotag() == null)
      {
        // Extract association geoTag if it exists in the association
        if (esp.getGeotag() != null)
        { 
          e.setGeotag(getEntityGeo(esp.getGeotag(), null, field));
        }
        // Otherwise search geoMap on index (entity1_index, entity2_index) for a geoTag
        else
        {
          if (e.getEntity1_index() != null || e.getEntity2_index() != null)
          {
            GeoPojo s1 = _geoMap.get(e.getEntity1_index());
            if (s1 != null)
            {
              e.setGeotag(s1);
              e.setGeo_index(e.getEntity1_index());
            }
            else {
              GeoPojo s2 = _geoMap.get(e.getEntity2_index());
              if (s2 != null)
              {
                e.setGeotag(s2);
                e.setGeo_index(e.getEntity2_index());
              }
            }
          }
        }
        // Allow this to be intrinsically optional
      }

      // If all the indexes are null don't add the association
      if (e.getEntity1_index() == null && e.getEntity2_index() == null && e.getGeo_index() == null) {
        if (bDontResolveToIndices  && _context.isStandalone()) { // (minor message, while debugging only)
          _context.getHarvestStatus().logMessage("Warning: for summaries, at least one entity must be manually specified as an index", true);
        }
        return null;
      }
     
      // Calculate association type
      if (bDontResolveToIndices) {
        e.setAssociation_type("Summary");       
      }
      else {       
        e.setAssociation_type(AssociationUtils.getAssocType(e));
        if (null != esp.getAssoc_type()) {
          if (!e.getAssociation_type().equals("Summary")) {
            // Allowed to switch event<->fact
            if (esp.getAssoc_type().equalsIgnoreCase("fact")) {
              e.setAssociation_type("Fact");                     
            }
            else if (esp.getAssoc_type().equalsIgnoreCase("event")) {
              e.setAssociation_type("Event");                                   
            }
          }
        }
      }
     
View Full Code Here

Examples of com.ikanow.infinit.e.data_model.store.document.AssociationPojo

            pxPipe.featureEngine.assocRegex = Pattern.compile(toRegex, Pattern.CASE_INSENSITIVE);
          }//TOTEST
        }
        Iterator<AssociationPojo> it = doc.getAssociations().iterator();
        while (it.hasNext()) {
          AssociationPojo assoc = it.next();
          boolean removed = false;
          boolean matched = (null == pxPipe.featureEngine.assocRegex); // (ie always match if no regex spec'd)
          for (String index: Arrays.asList(assoc.getEntity1_index(), assoc.getEntity2_index(), assoc.getGeo_index())) {
            if (null != index) {
              if ((null != removedEntities) && removedEntities.contains(index)) {
                it.remove();
                removed = true;
                break;
              }//TOTEST (hand tested, script TBD)
              if (null != pxPipe.featureEngine.assocRegex) {
                boolean remove = false;
                if (pxPipe.featureEngine.assocRegex.matcher(index).find()) { // found
                  matched = true;
                  remove = !includeOnly;
                }//TOTEST (hand tested, script TBD)
                if (remove) { // exclude
                  it.remove();
                  removed = true;
                  break;
                }//TOTEST (hand tested, script TBD)           
              }
            }//(end if index present)
          }//(end loop over indexes)
          if (removed) {
            continue;
          }
          // Verb cat:
          if ((null != pxPipe.featureEngine.assocRegex) && (null != assoc.getVerb_category())) {
            boolean remove = false;
            if (pxPipe.featureEngine.assocRegex.matcher(assoc.getVerb_category()).find()) { // found
              matched = true;
              remove = !includeOnly;
            }//TOTEST (hand tested, script TBD)
            if (remove) { // exclude
              it.remove();
              continue;
            }//TOTEST (hand tested, script TBD)           
          }
          // Verb
          if ((null != pxPipe.featureEngine.assocRegex) && (null != assoc.getVerb())) {
            boolean remove = false;
            if (pxPipe.featureEngine.assocRegex.matcher(assoc.getVerb()).find()) { // found
              matched = true;
              remove = !includeOnly;
            }//TOTEST (hand tested, script TBD)
            if (remove) { // exclude
              it.remove();
View Full Code Here

Examples of com.ikanow.infinit.e.data_model.store.document.AssociationPojo

      } // (end if filter entities out)
     
      if (null != doc.getAssociations()) {
        Iterator<AssociationPojo> it = doc.getAssociations().iterator();
        while (it.hasNext()) {
          AssociationPojo evt = it.next();
          try
          {
            // Apply association index filter, if it exists
            if (null != filter) {
              Pattern whichRegex = null;
              String whichPattern = null;
              if ((null != evt.getGeotag()) && (null != filter.assocGeoFilterRegex)) {
                whichRegex = filter.assocGeoFilterRegex;
                whichPattern = filter.assocGeoFilter;
              }
              else {
                whichRegex = filter.assocFilterRegex;
                whichPattern = filter.assocFilter;               
              } // (end which regex to pick)
              if (null != whichRegex) {
                if (whichPattern.startsWith("-")) {
                  if (whichRegex.matcher(AssociationPojoIndexMap.serialize(evt)).find()) {
                    it.remove();
                    continue;
                  }
                }
                else if (!whichRegex.matcher(AssociationPojoIndexMap.serialize(evt)).find()) {
                  it.remove();
                  continue;
                }
               
              } // (end if regex exists)
            }
            //TESTED: positive and negative associations, normal only (geo from cut and paste from entities)
           
            // Add event date ranges month by month (eg for histograms)
            if (null != evt.getTime_start()) {
              Date d1 = sdf.parse(evt.getTime_start());
              if (null == doc.getMonths()) {
                doc.setMonths(new TreeSet<Integer>());
              }
 
              if (null == evt.getTime_end()) {
                Calendar c = Calendar.getInstance();
                c.setTime(d1);
                doc.getMonths().add(c.get(Calendar.YEAR)*100 + c.get(Calendar.MONTH)+1);
 
              }
              else {
                Date d2 = sdf.parse(evt.getTime_end());
                Calendar c = Calendar.getInstance();
                c.setTime(d1);
 
                int nStartYr = c.get(Calendar.YEAR);
                int nStartMo = c.get(Calendar.MONTH)+1;
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.