Package edu.isi.karma.kr2rml.writer

Source Code of edu.isi.karma.kr2rml.writer.KR2RMLBloomFilterManager

package edu.isi.karma.kr2rml.writer;

import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap;

import org.apache.hadoop.util.bloom.Key;
import org.apache.hadoop.util.hash.Hash;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import edu.isi.karma.kr2rml.mapping.R2RMLMappingIdentifier;
import edu.isi.karma.modeling.Uris;

public class KR2RMLBloomFilterManager {

  private static final Logger LOG = LoggerFactory.getLogger(KR2RMLBloomFilterManager.class);
  private static final Charset UTF8_CHARSET = Charset.forName("UTF-8");
  protected ConcurrentHashMap<String, KR2RMLBloomFilter> idToBloomFilter;
  protected R2RMLMappingIdentifier mappingIdentifier;
  public KR2RMLBloomFilterManager(R2RMLMappingIdentifier mappingIdentifier)
  {
    idToBloomFilter = new ConcurrentHashMap<String, KR2RMLBloomFilter>();
    this.mappingIdentifier = mappingIdentifier;
  }
  public KR2RMLBloomFilterManager(JSONObject serializedManager) throws IOException
  {
    idToBloomFilter = new ConcurrentHashMap<String, KR2RMLBloomFilter>();
    String idsConcatenated = serializedManager.getString("ids");
    String[] ids = idsConcatenated.split(",");
    for(String id : ids)
    {
      String base64EncodedBloomFilter = serializedManager.getString(id);
      KR2RMLBloomFilter bf = new KR2RMLBloomFilter();
      bf.populateFromCompressedAndBase64EncodedString(base64EncodedBloomFilter);
      idToBloomFilter.put(id, bf);
    }
    this.mappingIdentifier = new R2RMLMappingIdentifier(serializedManager.getJSONObject("mappingIdentifier"));
  }
 
  public KR2RMLBloomFilter getBloomFilter(String id)
  {
    return idToBloomFilter.get(id);
  }
  public void addUriToBloomFilter(String id, String uri) {
    KR2RMLBloomFilter bf = null;
    if(!idToBloomFilter.containsKey(id))
    {
      idToBloomFilter.putIfAbsent(id, new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize, KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH));
    }
    bf = idToBloomFilter.get(id);
   
    Key k = new Key(uri.getBytes(UTF8_CHARSET));
    bf.add(k);
    return;
  }
 
  public JSONObject toJSON()
  {
    JSONObject filters = new JSONObject();
    StringBuffer ids = new StringBuffer();
    for(Entry<String, KR2RMLBloomFilter> entry : idToBloomFilter.entrySet())
    {
      String key = entry.getKey();
      KR2RMLBloomFilter bf = entry.getValue();
     
      try
      {
        String base64EncodedCompressedSerializedBloomFilter = bf.compressAndBase64Encode();
        filters.put(key, base64EncodedCompressedSerializedBloomFilter);
      }
      catch (IOException e)
      {
        LOG.error("Unable to append bloom filter for id: " +key);
        continue;
      }
      if(ids.length() != 0)
      {
        ids.append(",");
      }
      ids.append(entry.getKey());
     
     
    }
    filters.put("ids", ids.toString());
    filters.put("mappingIdentifier", mappingIdentifier.toJSON());
    return filters;
  }
  public String toRDF()
  {
    StringBuilder builder = new StringBuilder();
    for(Entry<String, KR2RMLBloomFilter> entry : idToBloomFilter.entrySet())
    {
      KR2RMLBloomFilter bf = entry.getValue();
      String key = entry.getKey();
      StringBuilder tripleBuilder = new StringBuilder();
      tripleBuilder.append("<");
      tripleBuilder.append(key);
      tripleBuilder.append("> <");
      tripleBuilder.append(Uris.KM_HAS_BLOOMFILTER);
      tripleBuilder.append("> \"");
      try
      {
      String base64EncodedCompressedSerializedBloomFilter = bf.compressAndBase64Encode();
        tripleBuilder.append(base64EncodedCompressedSerializedBloomFilter);
      }
      catch (IOException e)
      {
        LOG.error("Unable to append bloom filter for id: " + key);
        continue;
      }
      tripleBuilder.append("\" . \n");
      builder.append(tripleBuilder);
    }
    return builder.toString();
  }
 
}
TOP

Related Classes of edu.isi.karma.kr2rml.writer.KR2RMLBloomFilterManager

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.