Examples of EntityPojo


Examples of com.ikanow.infinit.e.data_model.store.document.EntityPojo

        JsonNode ent1node = current_node.get(esp.entity1column);
        if ( ent1node.isArray() )
        {
          Iterator<JsonNode> entiter = ent1node.getElements();
          curr_ent = entiter.next().getTextValue().toLowerCase();
          EntityPojo matchEnt1 = findMappedEntityName(curr_ent);
          if ( null != matchEnt1)
          {
            ep.setEntity1(matchEnt1.getActual_name());
            ep.setEntity1_index(createEntityIndex(matchEnt1));
            if ( ep.getGeotag() == null && matchEnt1.getGeotag() != null) //try to set geotag if it already hasn't been
              ep.setGeotag(matchEnt1.getGeotag().deepCopy());
          }
          else
            ep.setEntity1(curr_ent);         
         
          if ( entiter.hasNext())
          {
            curr_ent = entiter.next().getTextValue().toLowerCase();
            EntityPojo matchEnt12 = findMappedEntityName(curr_ent);
            if ( null != matchEnt12 )
            {
              ep.setEntity2(matchEnt12.getActual_name());
              ep.setEntity2_index(createEntityIndex(matchEnt12));
              if ( ep.getGeotag() == null && matchEnt12.getGeotag() != null) //try to set geotag if it already hasn't been
                ep.setGeotag(matchEnt12.getGeotag().deepCopy());
            }
            else
              ep.setEntity2(curr_ent);           
          }
        }
        else
        {
          curr_ent = current_node.get(esp.entity1column).getTextValue().toLowerCase();
          EntityPojo matchEnt1Only = findMappedEntityName(curr_ent);
          if ( null != matchEnt1Only )
          {
            ep.setEntity1(matchEnt1Only.getActual_name());
            ep.setEntity1_index(createEntityIndex(matchEnt1Only));
            if ( ep.getGeotag() == null && matchEnt1Only.getGeotag() != null ) //try to set geotag if it already hasn't been
              ep.setGeotag(matchEnt1Only.getGeotag().deepCopy());
          }
          else
            ep.setEntity1(curr_ent);         
        }
      }     
      //entity 2     
      if ( null != esp.entity2column && null != current_node.get(esp.entity2column)  )
      {
        JsonNode ent2node = current_node.get(esp.entity2column);
        if ( ent2node.isTextual() )
        {
          curr_ent = current_node.get(esp.entity2column).getTextValue().toLowerCase();
          EntityPojo matchEnt2 = findMappedEntityName(curr_ent);
          if ( null != matchEnt2 )
          {
            ep.setEntity2(matchEnt2.getActual_name());
            ep.setEntity2_index(createEntityIndex(matchEnt2));
            if ( ep.getGeotag() == null && matchEnt2.getGeotag() != null ) //try to set geotag if it already hasn't been
              ep.setGeotag(matchEnt2.getGeotag().deepCopy());
          }
          else
            ep.setEntity2(curr_ent);
        }
      }
      //verb and verb category (if there is a verb cat, assign that and then get column value)
      if ( null != esp.verbcategory )
      {
        ep.setVerb_category(esp.verbcategory);
       
        if ( null != esp.verbcolumn && null != current_node.get(esp.verbcolumn) )
        {
          JsonNode verbnode = current_node.get(esp.verbcolumn);
          if ( verbnode.isTextual() )
          {
            ep.setVerb(current_node.get(esp.verbcolumn).getTextValue().toLowerCase());
            EntityPojo verbent = findMappedEntityName(ep.getVerb());
            if ( verbent != null )
              ep.setVerb(verbent.getActual_name());
          }
        }
      }
      else if ( null != esp.verbcolumn && null != current_node.get(esp.verbcolumn) )
      {
        ep.setVerb(current_node.get(esp.verbcolumn).getTextValue().toLowerCase());
      }
      //location
      if ( null != esp.locationcolumn && null != current_node.get(esp.locationcolumn) )
      {
        curr_ent = current_node.get(esp.locationcolumn).getTextValue().toLowerCase();
        EntityPojo geoEnt = findMappedEntityName(curr_ent);
        if ( geoEnt != null && geoEnt.getGeotag() != null )
        {
          ep.setGeo_index(createEntityIndex(geoEnt));       
          ep.setGeotag(geoEnt.getGeotag().deepCopy()); //location always over-rides geotag location
        }
      }     
      //time
      if ( null != esp.timecolumnstart && null != current_node.get(esp.timecolumnstart) )
      {
View Full Code Here

Examples of com.ikanow.infinit.e.data_model.store.document.EntityPojo

    List<EntityPojo> ents = new ArrayList<EntityPojo>();
    if ( sc.keywords != null)
    {
      for ( AlchemyKeywordPojo ae : sc.keywords)
      {
        EntityPojo ent = convertAlchemyKeywordToEntPojo(ae);
        if ( ent != null )
          ents.add(ent);
      }
    }
    return ents; 
View Full Code Here

Examples of com.ikanow.infinit.e.data_model.store.document.EntityPojo

 
  private static EntityPojo convertAlchemyKeywordToEntPojo(AlchemyKeywordPojo pojoToConvert)
  {
    try
    {
      EntityPojo ent = new EntityPojo();
      ent.setActual_name(pojoToConvert.text);
      ent.setType("Keyword");
      ent.setRelevance(Double.parseDouble(pojoToConvert.relevance));
      ent.setFrequency(1L);
      if (null != pojoToConvert.sentiment) {
        if (null != pojoToConvert.sentiment.score) {
          ent.setSentiment(Double.parseDouble(pojoToConvert.sentiment.score));
        }
        else { // neutral
          ent.setSentiment(0.0);
        }
      }
      // (else no sentiment present)
     
      ent.setDisambiguatedName(pojoToConvert.text);
      ent.setActual_name(pojoToConvert.text);
     
      ent.setDimension(EntityPojo.Dimension.What);
      return ent;
    }
    catch (Exception ex)
    {
      logger.error("Line: [" + ex.getStackTrace()[2].getLineNumber() + "] " + ex.getMessage());
View Full Code Here

Examples of com.ikanow.infinit.e.data_model.store.document.EntityPojo

          {             
            String field = entityRecords.getString(i);
            long nIndex = Long.valueOf(i);
           
            if (null != esp.getType()) { // (else cannot be a valid entity, must just be a list)
              EntityPojo entity = getEntity(esp, field, String.valueOf(i), f);
              if (entity != null) entities.add(entity)
            }
           
            // Does the association break out into multiple associations?
            if (esp.getEntities() != null)
            {
              // Iterate over the associations and call getAssociations recursively
              for (EntitySpecPojo subEsp : esp.getEntities())
              { 
                if (null != subEsp.getIterateOver()) {
                  if (null == subEsp.getCreationCriteriaScript()) {
                    _context.getHarvestStatus().logMessage(new StringBuffer("In iterator ").
                        append(esp.getIterateOver()).append(", trying to loop over field '").
                        append(subEsp.getIterateOver()).append("' in array of primitives.").toString(), true);
                  }
                  else {
                    this.executeEntityAssociationValidation(subEsp.getCreationCriteriaScript(), field, Long.toString(nIndex));
                  }
                  // (any creation criteria script indicates user accepts it can be either)
                }
                if (null != subEsp.getDisambiguated_name()) {
                  EntityPojo entity = getEntity(subEsp, field, String.valueOf(i), f);
                  if (entity != null) entities.add(entity)
                }
              }                   
            }//TESTED (error case, mixed object)
          }
        }

        /*
         *  EntityRecords is a JSONArray
         */
        else if (objType.equalsIgnoreCase("class org.json.JSONObject"))
        {
          // Iterate over array elements and extract entities
          for (int i = 0; i < entityRecords.length(); ++i)
          {
            // Get JSONObject containing entity fields and pass entityElement
            // into the script engine so scripts can access it
            JSONObject savedIterator = null;
            if (_scriptEngine != null)
            {
              _iterator = savedIterator = entityRecords.getJSONObject(i);
            }

            if (null != esp.getType()) { // (else cannot be a valid entity, must just be a list)
              EntityPojo entity = getEntity(esp, null, String.valueOf(i), f);
              if (entity != null) entities.add(entity);
            }
           
            // Does the entity break out into multiple entities?
            if (esp.getEntities() != null)
            {
              // Iterate over the entities and call getEntities recursively
              for (EntitySpecPojo subEsp : esp.getEntities())
              { 
                _iterator = savedIterator; // (reset this)
               
                List<EntityPojo> subEntities = getEntities(subEsp, f, _iterator);
                for (EntityPojo e : subEntities)
                {
                  entities.add(e);
                }
              }
            }
          }
        }

        if (_iterator != currObj) { // (ie at the top level)
          _iterator = null;
        }
      }
      catch (Exception e)
      {
        //e.printStackTrace();
        //System.out.println(e.getMessage());
        //logger.error("Exception: " + e.getMessage());
      }
    }
   
    // Single entity
    else
    {
      // Does the entity break out into multiple entities?
      if (esp.getEntities() != null)
      {
        // Iterate over the entities and call getEntities recursively
        for (EntitySpecPojo subEsp : esp.getEntities())
        { 
          List<EntityPojo> subEntities = getEntities(subEsp, f, currObj);
          for (EntityPojo e : subEntities)
          {
            entities.add(e);
          }
        }
      }
      else
      {
        EntityPojo entity = getEntity(esp, null, null, f);
        if (entity != null) entities.add(entity)
      }
    }
   
    return entities;
View Full Code Here

Examples of com.ikanow.infinit.e.data_model.store.document.EntityPojo

      Matcher m = currKeywordRegex.matcher(batchedDoc.fullText);

      while (m.find()) {
       
        String name = m.group().toLowerCase();
        EntityPojo ent = currKeywordMap.get(name);
       
        if ((null != ent) && (nDoc != ent.getDoccount())) { // (see below)
          if (null == batchedDoc.doc.getEntities()) {
            batchedDoc.doc.setEntities(new ArrayList<EntityPojo>());
          }
          batchedDoc.doc.getEntities().add(ent);
          ent.setDoccount(nDoc);
            // use this as an efficient check to only add each entity once per doc
            // doccount gets overwritten by the generic processing module so fine to abuse this
        }
        // (else probably an internal logic error ie shouldn't happen)
       
View Full Code Here

Examples of com.ikanow.infinit.e.data_model.store.document.EntityPojo

    // If the EntitySpecPojo or DocumentPojo is null return null
    if ((esp == null) || (f == null)) return null;
   
    try
    {
      EntityPojo e = new EntityPojo();
     
      // Parse creation criteria script to determine if the entity should be added
      if (esp.getCreationCriteriaScript() != null && JavaScriptUtils.containsScript(esp.getCreationCriteriaScript()))
      {
        boolean addEntity = executeEntityAssociationValidation(esp.getCreationCriteriaScript(), field, index);
        if (!addEntity) {
          return null;
        }
      }
     
      // Entity.disambiguous_name
      String disambiguatedName = null;
      if (JavaScriptUtils.containsScript(esp.getDisambiguated_name()))
      {
        disambiguatedName = (String)getValueFromScript(esp.getDisambiguated_name(), field, index);
      }
      else
      {
        if ((_iterator != null) && (esp.getDisambiguated_name().startsWith("$metadata.") || esp.getDisambiguated_name().startsWith("${metadata."))) {
          if (_context.isStandalone()) { // (minor message, while debugging only)
            _context.getHarvestStatus().logMessage("Warning: in disambiguated_name, using global $metadata when iterating", true);
          }
        }
        // Field - passed in via simple string array from getEntities
        if (field != null)
        {
          disambiguatedName = getFormattedTextFromField(esp.getDisambiguated_name(), field);
        }
        else
        {
          disambiguatedName = getFormattedTextFromField(esp.getDisambiguated_name(), field);
        }
      }
     
      // Only proceed if disambiguousName contains a meaningful value
      if (disambiguatedName != null && disambiguatedName.length() > 0)
      {
        e.setDisambiguatedName(disambiguatedName);
      }
      else // Always log failure to get a dname - to remove this, specify a creationCriteriaScript
      {
        _context.getHarvestStatus().logMessage(new StringBuffer("Failed to get required disambiguated_name from: ").append(esp.getDisambiguated_name()).toString(), true);
        return null;
      }
     
      // Entity.frequency (count)
      String freq = "1";
      if (esp.getFrequency() != null)
      {
        if (JavaScriptUtils.containsScript(esp.getFrequency()))
        {
          freq = getValueFromScript(esp.getFrequency(), field, index).toString();
        }
        else
        {
          freq = getFormattedTextFromField(esp.getFrequency(), field);
        }
        // Since we've specified freq, we're going to enforce it
        if (null == freq) { // failed to get it
          if (null == esp.getCreationCriteriaScript()) {
            _context.getHarvestStatus().logMessage(new StringBuffer("Failed to get required frequency from: ").append(esp.getFrequency()).toString(), true);
            return null;
          }
        }
      }

      // Try converting the freq string value to its numeric (double) representation
      Double frequency = (double) 0;
      try 
      {
        frequency = Double.parseDouble(freq);
      }
      catch (Exception e1)
      {
        this._context.getHarvestStatus().logMessage(e1.getMessage(), true);
        return null;
      }
     
      // Only proceed if frequency > 0
      if (frequency > 0)
      {
        e.setFrequency(frequency.longValue()); // Cast to long from double
      }
      else
      {
        return null;  
     
     
      // Entity.actual_name
      String actualName = null;
      if (esp.getActual_name() != null)
      {
        if (JavaScriptUtils.containsScript(esp.getActual_name()))
        {
          actualName = (String)getValueFromScript(esp.getActual_name(), field, index);
        }
        else
        {
          if ((_iterator != null) && (esp.getActual_name().startsWith("$metadata.") || esp.getActual_name().startsWith("${metadata."))) {
            if (_context.isStandalone()) { // (minor message, while debugging only)
              _context.getHarvestStatus().logMessage("Warning: in actual_name, using global $metadata when iterating", true);
            }
          }
          actualName = getFormattedTextFromField(esp.getActual_name(), field);
        }
        // Since we've specified actual name, we're going to enforce it (unless otherwise specified)
        if (null == actualName) { // failed to get it
          if (null == esp.getCreationCriteriaScript()) {
            if (_context.isStandalone()) { // (minor message, while debugging only)
              _context.getHarvestStatus().logMessage(new StringBuffer("Failed to get required actual_name from: ").append(esp.getActual_name()).toString(), true);
            }
            return null;
          }
        }
      }
      // If actualName == null set it equal to disambiguousName
      if (actualName == null) actualName = disambiguatedName;
      e.setActual_name(actualName);
     
      // Entity.type
      String type = null;
      if (esp.getType() != null)
      {
        if (JavaScriptUtils.containsScript(esp.getType()))
        {
          type = (String)getValueFromScript(esp.getType(), field, index);
        }
        else
        {
          type = getFormattedTextFromField(esp.getType(), field);
        }
        // Since we've specified type, we're going to enforce it (unless otherwise specified)
        if (null == type) { // failed to get it
          if (null == esp.getCreationCriteriaScript()) {
            _context.getHarvestStatus().logMessage(new StringBuffer("Failed to get required type from: ").append(esp.getType()).toString(), true);
            return null;
          }
        }
      }
      else
      {
        type = "Keyword";
      }
      e.setType(type);
     
      // Entity.index
      String entityIndex = disambiguatedName + "/" + type;
      e.setIndex(entityIndex.toLowerCase());
     
      // Now check if we already exist, discard if so:
      if (_entityMap.contains(e.getIndex())) {
        return null;
      }

      // Entity.dimension
      String dimension = null;
      if (esp.getDimension() != null)
      {
        if (JavaScriptUtils.containsScript(esp.getDimension()))
        {
          dimension = (String)getValueFromScript(esp.getDimension(), field, index);
        }
        else
        {
          dimension = getFormattedTextFromField(esp.getDimension(), field);
        }
        // Since we've specified dimension, we're going to enforce it (unless otherwise specified)
        if (null == dimension) { // failed to get it
          if (null == esp.getCreationCriteriaScript()) {
            _context.getHarvestStatus().logMessage(new StringBuffer("Failed to get required dimension from: ").append(esp.getDimension()).toString(), true);
            return null;
          }
        }
      }
      if (null == dimension) {
        try {
          e.setDimension(DimensionUtility.getDimensionByType(type));
        }
        catch (java.lang.IllegalArgumentException ex) {
          e.setDimension(EntityPojo.Dimension.What);                 
        }
      }
      else {
        try {
          EntityPojo.Dimension enumDimension = EntityPojo.Dimension.valueOf(dimension);
          if (null == enumDimension) {
            _context.getHarvestStatus().logMessage(new StringBuffer("Invalid dimension: ").append(dimension).toString(), true);
            return null; // (invalid dimension)
          }
          else {
            e.setDimension(enumDimension);
          }
        }
        catch (Exception e2) {
          _context.getHarvestStatus().logMessage(new StringBuffer("Invalid dimension: ").append(dimension).toString(), true);
          return null; // (invalid dimension)         
        }
      }
     
      // Entity.relevance
      String relevance = "0";
      if (esp.getRelevance() != null)
      {
        if (JavaScriptUtils.containsScript(esp.getRelevance()))
        {
          relevance = (String)getValueFromScript(esp.getRelevance(), field, index);
        }
        else
        {
          relevance = getFormattedTextFromField(esp.getRelevance(), field);
        }
        // Since we've specified relevance, we're going to enforce it (unless otherwise specified)
        if (null == relevance) { // failed to get it
          if (null == esp.getCreationCriteriaScript()) {
            _context.getHarvestStatus().logMessage(new StringBuffer("Failed to get required relevance from: ").append(esp.getRelevance()).toString(), true);
            return null;
          }
        }
      }
      try {
        e.setRelevance(Double.parseDouble(relevance));
      }
      catch (Exception e1) {
        this._context.getHarvestStatus().logMessage(e1.getMessage(), true);
        return null;       
      }

      // Entity.sentiment (optional field)
      if (esp.getSentiment() != null)
      {
        String sentiment;
        if (JavaScriptUtils.containsScript(esp.getSentiment()))
        {
          sentiment = (String)getValueFromScript(esp.getSentiment(), field, index);
        }
        else
        {
          sentiment = getFormattedTextFromField(esp.getSentiment(), field);
        }
        // (sentiment is optional, even if specified)
        if (null != sentiment) {
          try {
            double d = Double.parseDouble(sentiment);
            e.setSentiment(d);
            if (null == e.getSentiment()) {
              if (_context.isStandalone()) { // (minor message, while debugging only)
                _context.getHarvestStatus().logMessage(new StringBuffer("Invalid sentiment: ").append(sentiment).toString(), true);
              }             
            }
          }
          catch (Exception e1) {
            this._context.getHarvestStatus().logMessage(e1.getMessage(), true);
            return null;       
          }
        }
      }

      // Entity Link data:
     
      if (esp.getLinkdata() != null)
      {
       
        String linkdata = null;
        if (JavaScriptUtils.containsScript(esp.getLinkdata()))
        {
          linkdata = (String)getValueFromScript(esp.getLinkdata(), field, index);
        }
        else
        {
          linkdata = getFormattedTextFromField(esp.getLinkdata(), field);
        }
        // linkdata is optional, even if specified
        if (null != linkdata) {
          String[] links = linkdata.split("\\s+");
          e.setSemanticLinks(Arrays.asList(links));
        }
      }
     
     
      // Extract Entity GEO or set Entity Geo equal to DocGeo if specified via useDocGeo
      if (esp.getGeotag() != null)
      { 
        GeoPojo geo = getEntityGeo(esp.getGeotag(), null, field);
        if (null != geo) {
          e.setGeotag(geo);
        }
        // (Allow this field to be intrinsically optional)
       
        // If no ontology type is specified, derive it from getEntityGeo:
        if (null == esp.getOntology_type()) {
          esp.setOntology_type(esp.getGeotag().getOntology_type());
        }
      }
      else if (esp.getUseDocGeo() == true)
      {
        GeoPojo geo = getEntityGeo(null, f, field);
        if (null != geo) {
          e.setGeotag(geo);
        }
        // (Allow this field to be intrinsically optional)
      }

      // Entity.ontological_type (
      String ontology_type = null;
      if (esp.getOntology_type() != null)
      {
        if (JavaScriptUtils.containsScript(esp.getOntology_type()))
        {
          ontology_type = (String)getValueFromScript(esp.getOntology_type(), field, index);
        }
        else
        {
          ontology_type = getFormattedTextFromField(esp.getOntology_type(), field);
        }
        // Allow this field to be intrinsically optional
      }
      // If ontological_type == null, go fetch it from the internal lookup
      if (ontology_type == null) {
        e.setOntology_type(GeoOntologyMapping.mapEntityToOntology(type));
      }
      else if ('p' == GeoOntologyMapping.encodeOntologyCode(ontology_type) && !ontology_type.equals("point")) {
        // In this case we don't recognize the ontology type so we'll overwrite it
        e.setOntology_type(GeoOntologyMapping.mapEntityToOntology(type));       
      }
      e.setOntology_type(ontology_type);     
           
      // Add the index and geotag to geomap to get used by associations with matching indexes
      if (e.getGeotag() != null)
      {
        _geoMap.put(e.getIndex(), e.getGeotag());
      }
      _entityMap.add(e.getIndex());
     
      return e;
    }
    catch (Exception ex)
    {
View Full Code Here

Examples of com.ikanow.infinit.e.data_model.store.document.EntityPojo

      if ((null != filter) && (null != doc.getEntities()) &&
          ((null != filter.entityFilter)||(null != filter.entityGeoFilter)))
      {
        Iterator<EntityPojo> it = doc.getEntities().iterator();
        while (it.hasNext()) {
          EntityPojo ent = it.next();
          Pattern whichRegex = null;
          String whichPattern = null;
          if (null != ent.getGeotag() && (null != filter.entityGeoFilterRegex)) {
            whichRegex = filter.entityGeoFilterRegex;
            whichPattern = filter.entityGeoFilter;
          }
          else {
            whichRegex = filter.entityFilterRegex;
            whichPattern = filter.entityFilter;               
          } // (end which regex to pick)
          if (null != whichRegex) {
            if (whichPattern.startsWith("-")) {
              if (whichRegex.matcher(ent.getIndex()).find()) {
                it.remove();
                continue;
              }
            }
            else if (!whichRegex.matcher(ent.getIndex()).find()) {
              it.remove();
              continue;
            }         
          } // (end if regex exists)
        }//TESTED positive and negative geo and normal entities
View Full Code Here

Examples of com.ikanow.infinit.e.data_model.store.document.EntityPojo

    DocumentPojoIndexMap docMap = new DocumentPojoIndexMap();
    System.out.println("DOC_INDEX=" + IndexManager.mapToIndex(doc, docMap));
   
    ////////////////////////////////////////////////
    // Check use of enums in Entity pojo works
    EntityPojo testEnt = new EntityPojo();
    testEnt.setDimension(EntityPojo.Dimension.Where);
    System.out.println("ENT1=" + new GsonBuilder().setPrettyPrinting().create().toJson(testEnt));   
    System.out.println("DIM=" + testEnt.getDimension());
    BasicDBObject testEntDb = new BasicDBObject("dimension", "Who");
    testEnt = new Gson().fromJson(testEntDb.toString(), EntityPojo.class);
    System.out.println("ENT2=" + new GsonBuilder().setPrettyPrinting().create().toJson(testEnt));
    try {
      testEntDb = new BasicDBObject("dimension", "what");
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.