/*******************************************************************************
* Copyright 2012 University of Southern California
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This code was developed by the Information Integration Group as part
* of the Karma project at the Information Sciences Institute of the
* University of Southern California. For more information, publications,
* and related projects, please see: http://www.isi.edu/integration
******************************************************************************/
package edu.isi.karma.kr2rml;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import edu.isi.karma.controller.command.selection.SuperSelection;
import edu.isi.karma.kr2rml.ErrorReport.Priority;
import edu.isi.karma.kr2rml.mapping.KR2RMLMapping;
import edu.isi.karma.kr2rml.mapping.KR2RMLMappingColumnNameHNodeTranslator;
import edu.isi.karma.kr2rml.planning.DFSTriplesMapGraphDAGifier;
import edu.isi.karma.kr2rml.planning.RootStrategy;
import edu.isi.karma.kr2rml.planning.SteinerTreeRootStrategy;
import edu.isi.karma.kr2rml.planning.TriplesMap;
import edu.isi.karma.kr2rml.planning.TriplesMapGraph;
import edu.isi.karma.kr2rml.planning.TriplesMapPlan;
import edu.isi.karma.kr2rml.planning.TriplesMapPlanExecutor;
import edu.isi.karma.kr2rml.planning.TriplesMapPlanGenerator;
import edu.isi.karma.kr2rml.planning.TriplesMapWorkerPlan;
import edu.isi.karma.kr2rml.planning.WorksheetDepthRootStrategy;
import edu.isi.karma.kr2rml.writer.AvroKR2RMLRDFWriter;
import edu.isi.karma.kr2rml.writer.KR2RMLRDFWriter;
import edu.isi.karma.kr2rml.writer.N3KR2RMLRDFWriter;
import edu.isi.karma.kr2rml.writer.SFKR2RMLRDFWriter;
import edu.isi.karma.modeling.Namespaces;
import edu.isi.karma.modeling.Uris;
import edu.isi.karma.modeling.ontology.OntologyManager;
import edu.isi.karma.rep.HNode;
import edu.isi.karma.rep.RepFactory;
import edu.isi.karma.rep.Row;
import edu.isi.karma.rep.Worksheet;
public class KR2RMLWorksheetRDFGenerator {
protected RepFactory factory;
protected Worksheet worksheet;
protected String outputFileName;
protected OntologyManager ontMgr;
protected ErrorReport errorReport;
protected boolean addColumnContextInformation;
protected KR2RMLMapping kr2rmlMapping;
protected KR2RMLMappingColumnNameHNodeTranslator translator;
protected ConcurrentHashMap<String, String> hNodeToContextUriMap;
protected List<KR2RMLRDFWriter> outWriters;
protected List<String> tripleMapToKill = new ArrayList<String>();
protected List<String> tripleMapToStop = new ArrayList<String>();
protected List<String> POMToKill = new ArrayList<String>();
private Logger logger = LoggerFactory.getLogger(KR2RMLWorksheetRDFGenerator.class);
private URIFormatter uriFormatter;
private RootStrategy strategy;
private SuperSelection selection;
public KR2RMLWorksheetRDFGenerator(Worksheet worksheet, RepFactory factory,
OntologyManager ontMgr, String outputFileName, boolean addColumnContextInformation,
KR2RMLMapping kr2rmlMapping, ErrorReport errorReport, SuperSelection sel) throws UnsupportedEncodingException, FileNotFoundException {
initializeMemberVariables(worksheet, factory, ontMgr, outputFileName,
addColumnContextInformation, kr2rmlMapping, errorReport);
File f = new File(this.outputFileName);
File parentDir = f.getParentFile();
parentDir.mkdirs();
BufferedWriter bw = new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(f),"UTF-8"));
outWriters.add(new N3KR2RMLRDFWriter(uriFormatter, new PrintWriter (bw)));
this.selection = sel;
}
public KR2RMLWorksheetRDFGenerator(Worksheet worksheet, RepFactory factory,
OntologyManager ontMgr, KR2RMLRDFWriter writer, boolean addColumnContextInformation,RootStrategy strategy,
KR2RMLMapping kr2rmlMapping, ErrorReport errorReport, SuperSelection sel) {
initializeMemberVariables(worksheet, factory, ontMgr, outputFileName,
addColumnContextInformation, kr2rmlMapping, errorReport);
this.outWriters.add(writer);
this.strategy = strategy;
this.selection = sel;
}
public KR2RMLWorksheetRDFGenerator(Worksheet worksheet, RepFactory factory,
OntologyManager ontMgr, List<KR2RMLRDFWriter> writers, boolean addColumnContextInformation,
KR2RMLMapping kr2rmlMapping, ErrorReport errorReport, SuperSelection sel) {
initializeMemberVariables(worksheet, factory, ontMgr, outputFileName,
addColumnContextInformation, kr2rmlMapping, errorReport);
this.outWriters.addAll(writers);
this.selection = sel;
}
public KR2RMLWorksheetRDFGenerator(Worksheet worksheet, RepFactory factory,
OntologyManager ontMgr, List<KR2RMLRDFWriter> writers, boolean addColumnContextInformation,
RootStrategy strategy, List<String> tripleMapToKill, List<String> tripleMapToStop,
List<String> POMToKill,
KR2RMLMapping kr2rmlMapping, ErrorReport errorReport, SuperSelection sel) {
initializeMemberVariables(worksheet, factory, ontMgr, outputFileName,
addColumnContextInformation, kr2rmlMapping, errorReport);
this.strategy = strategy;
this.tripleMapToKill = tripleMapToKill;
this.tripleMapToStop = tripleMapToStop;
this.POMToKill = POMToKill;
this.outWriters.addAll(writers);
this.selection = sel;
}
public KR2RMLWorksheetRDFGenerator(Worksheet worksheet, RepFactory factory,
OntologyManager ontMgr, PrintWriter writer, KR2RMLMapping kr2rmlMapping,
ErrorReport errorReport, boolean addColumnContextInformation, SuperSelection sel) {
super();
initializeMemberVariables(worksheet, factory, ontMgr, outputFileName,
addColumnContextInformation, kr2rmlMapping, errorReport);
this.outWriters.add(new N3KR2RMLRDFWriter(uriFormatter, writer));
this.selection = sel;
}
private void initializeMemberVariables(Worksheet worksheet,
RepFactory factory, OntologyManager ontMgr, String outputFileName,
boolean addColumnContextInformation, KR2RMLMapping kr2rmlMapping,
ErrorReport errorReport) {
this.ontMgr = ontMgr;
this.kr2rmlMapping = kr2rmlMapping;
this.factory = factory;
this.worksheet = worksheet;
this.outputFileName = outputFileName;
this.errorReport = errorReport;
this.uriFormatter = new URIFormatter(ontMgr, errorReport);
this.hNodeToContextUriMap = new ConcurrentHashMap<String, String>();
this.addColumnContextInformation = addColumnContextInformation;
this.translator = new KR2RMLMappingColumnNameHNodeTranslator(factory, worksheet);
this.outWriters = new LinkedList<KR2RMLRDFWriter>();
}
@SuppressWarnings({ "rawtypes", "unchecked" })
public void generateRDF(boolean closeWriterAfterGeneration) throws IOException {
try {
// RDF Generation starts at the top level rows
ArrayList<Row> rows = this.worksheet.getDataTable().getRows(0,
this.worksheet.getDataTable().getNumRows(), selection);
Map<TriplesMapGraph, List<String>> graphTriplesMapsProcessingOrder = new HashMap<TriplesMapGraph, List<String>>();
for(TriplesMapGraph graph : kr2rmlMapping.getAuxInfo().getTriplesMapGraph().getGraphs())
{
TriplesMapGraph copyGraph = graph.copyGraph();
if(null == strategy) {
strategy = new SteinerTreeRootStrategy(new WorksheetDepthRootStrategy());
}
copyGraph.killTriplesMap(tripleMapToKill, strategy);
copyGraph.stopTriplesMap(tripleMapToStop, strategy);
copyGraph.killPredicateObjectMap(POMToKill, strategy);
try{
DFSTriplesMapGraphDAGifier dagifier = new DFSTriplesMapGraphDAGifier();
List<String> triplesMapsProcessingOrder = new LinkedList<String>();
triplesMapsProcessingOrder = dagifier.dagify(copyGraph, strategy);
graphTriplesMapsProcessingOrder.put(copyGraph, triplesMapsProcessingOrder);
}catch (Exception e)
{
logger.error("Unable to find DAG for RDF Generation!", e);
throw new Exception("Unable to find DAG for RDF Generation!", e);
}
}
for (KR2RMLRDFWriter writer : outWriters) {
if (writer instanceof SFKR2RMLRDFWriter) {
SFKR2RMLRDFWriter jsonWriter = (SFKR2RMLRDFWriter)writer;
jsonWriter.addPrefixes(kr2rmlMapping.getPrefixes());
for(Entry<TriplesMapGraph, List<String>> entry : graphTriplesMapsProcessingOrder.entrySet())
{
List<String> triplesMapIds = entry.getValue();
jsonWriter.addRootTriplesMapId(triplesMapIds.get(triplesMapIds.size()-1));
}
if(jsonWriter instanceof AvroKR2RMLRDFWriter)
{
AvroKR2RMLRDFWriter avroWriter = (AvroKR2RMLRDFWriter) jsonWriter;
avroWriter.setProcessingOrder(graphTriplesMapsProcessingOrder);
}
}
}
int i=1;
TriplesMapPlanExecutor e = new TriplesMapPlanExecutor();
Map<TriplesMap, TriplesMapWorkerPlan> triplesMapToWorkerPlan = new HashMap<TriplesMap, TriplesMapWorkerPlan>() ;
for(TriplesMap triplesMap : kr2rmlMapping.getTriplesMapList())
{
try{
TriplesMapWorkerPlan workerPlan = new TriplesMapWorkerPlan(factory, triplesMap, kr2rmlMapping, uriFormatter, translator, addColumnContextInformation, hNodeToContextUriMap, selection);
triplesMapToWorkerPlan.put(triplesMap, workerPlan);
}
catch (Exception ex)
{
logger.error("unable to generate working plan for " + triplesMap.getId(), ex.getMessage());
}
}
for (Row row:rows) {
for(Entry<TriplesMapGraph, List<String>> entry : graphTriplesMapsProcessingOrder.entrySet())
{
TriplesMapPlanGenerator g = new TriplesMapPlanGenerator(triplesMapToWorkerPlan, row, outWriters);
TriplesMapPlan plan = g.generatePlan(entry.getKey(), entry.getValue());
errorReport.combine(e.execute(plan));
}
for(KR2RMLRDFWriter outWriter : outWriters)
{
outWriter.finishRow();
}
if (i++%2000 == 0)
logger.info("Done processing " + i + " rows");
}
e.shutdown(errorReport);
// Generate column provenance information if required
if (addColumnContextInformation) {
generateColumnProvenanceInformation();
}
} catch (Exception e)
{
logger.error("Unable to generate RDF: ", e);
errorReport.addReportMessage(new ReportMessage("General RDF Generation Error", e.getMessage(), Priority.high));
throw new IOException("Unable to generate RDF: " +e.getMessage());
}
finally {
if (closeWriterAfterGeneration) {
for(KR2RMLRDFWriter outWriter : outWriters)
{
outWriter.flush();
outWriter.close();
}
}
}
// An attempt to prevent an occasional error that occurs on Windows platform
// The requested operation cannot be performed on a file with a user-mapped section open
//System.gc();
}
private void generateColumnProvenanceInformation() {
for (String hNodeId:hNodeToContextUriMap.keySet()) {
getColumnContextTriples(hNodeId);
// Generate wasDerivedFrom property if required
HNode hNode = factory.getHNode(hNodeId);
if (hNode.isDerivedFromAnotherColumn()) {
HNode originalHNode = factory.getHNode(hNode.getOriginalColumnHNodeId());
if (originalHNode != null) {
getColumnContextTriples(originalHNode.getId());
for(KR2RMLRDFWriter outWriter : outWriters)
{
outWriter.outputTripleWithURIObject(
hNodeToContextUriMap.get(hNodeId), Uris.PROV_WAS_DERIVED_FROM_URI,
getColumnContextUri(originalHNode.getId()));
}
}
}
}
}
protected String getColumnContextUri (String hNodeId) {
if (hNodeToContextUriMap.containsKey(hNodeId))
return hNodeToContextUriMap.get(hNodeId);
else {
String randomId = UUID.randomUUID().toString();
String uri = Namespaces.KARMA_DEV + randomId + "_" + hNodeId;
hNodeToContextUriMap.put(hNodeId, uri);
return uri;
}
}
private void getColumnContextTriples(String hNodeId) {
String colUri = getColumnContextUri(hNodeId);
for(KR2RMLRDFWriter outWriter : outWriters)
{
// Generate the type
outWriter.outputTripleWithURIObject("<" + colUri + ">", Uris.RDF_TYPE_URI,
"<" + Uris.PROV_ENTITY_URI + ">");
// Generate the label
HNode hNode = factory.getHNode(hNodeId);
outWriter.outputTripleWithLiteralObject("<" + colUri + ">", Uris.RDFS_LABEL_URI,
hNode.getColumnName(), "");
}
}
}