Source Code of com.clarkparsia.empire.codegen.BeanGenerator$StatementToSubject

/*
 * Copyright (c) 2009-2012 Clark & Parsia, LLC. <http://www.clarkparsia.com>
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


package com.clarkparsia.empire.codegen;


import com.complexible.common.collect.Iterables2;
import com.complexible.common.collect.Iterators2;
import com.complexible.common.openrdf.model.Statements;
import com.complexible.common.openrdf.repository.Repositories;
import com.google.common.collect.Iterables;
import org.openrdf.model.Resource;
import org.openrdf.model.URI;
import org.openrdf.model.Statement;
import org.openrdf.model.Value;
import org.openrdf.model.Literal;
import org.openrdf.model.BNode;
import org.openrdf.model.vocabulary.OWL;
import org.openrdf.model.vocabulary.XMLSchema;
import org.openrdf.model.vocabulary.RDFS;
import org.openrdf.model.vocabulary.RDF;


import org.openrdf.model.impl.ValueFactoryImpl;


import org.openrdf.query.QueryLanguage;
import org.openrdf.repository.Repository;
import org.openrdf.rio.RDFFormat;


import org.openrdf.query.BindingSet;
import org.openrdf.query.TupleQueryResult;


import com.complexible.common.openrdf.util.AdunaIterations;


import com.complexible.common.collect.MultiIterator;


import com.complexible.common.net.NetUtils;
import com.complexible.common.base.Functions2;
import com.google.common.base.Predicate;
import com.google.common.base.Charsets;
import com.google.common.base.Function;
import com.google.common.base.Functions;
import com.google.common.collect.Collections2;
import com.google.common.collect.Sets;
import com.google.common.collect.Iterators;
import com.google.common.io.Files;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


import java.util.Collection;
import java.util.Map;
import java.util.List;
import java.util.Arrays;
import java.util.HashSet;
import java.util.HashMap;


import java.io.File;
import java.io.IOException;


import java.net.URL;


/**
 * <p>Generate a set of Java beans which are compatible with Empire from a given RDF schema, OWL ontology, or blob
 * of RDF data.  The generated source code will map to the domain represented in the RDF.</p>
 *
 * @author  Michael Grove
 * @since  0.5.1
 * @version  0.7.3
 */
public final class BeanGenerator {
  /**
   * The logger
   */
  private static final Logger LOGGER = LoggerFactory.getLogger(BeanGenerator.class);


  /**
   * String URI constant for the owl:Thing conccept
   */
  private static final URI OWL_THING = ValueFactoryImpl.getInstance().createURI(OWL.NAMESPACE + "Thing");


  /**
   * The list of xsd datatypes which map to Integer
   */
  private static final List<URI> integerTypes = Arrays.asList(XMLSchema.INT, XMLSchema.INTEGER, XMLSchema.POSITIVE_INTEGER,
                               XMLSchema.NEGATIVE_INTEGER, XMLSchema.NON_NEGATIVE_INTEGER,
                               XMLSchema.NON_POSITIVE_INTEGER, XMLSchema.UNSIGNED_INT);


  /**
   * The list of xsd datatypes which map to Long
   */
  private static final List<URI> longTypes = Arrays.asList(XMLSchema.LONG, XMLSchema.UNSIGNED_LONG);


  /**
   * The list of xsd datatypes which map to Float
   */
  private static final List<URI> floatTypes = Arrays.asList(XMLSchema.FLOAT, XMLSchema.DECIMAL);


  /**
   * The list of xsd datatypes which map to Short
   */
  private static final List<URI> shortTypes = Arrays.asList(XMLSchema.SHORT, XMLSchema.UNSIGNED_SHORT);


  /**
   * The list of xsd datatypes which map to Byte
   */
  private static final List<URI> byteTypes = Arrays.asList(XMLSchema.BYTE, XMLSchema.UNSIGNED_BYTE);


  private static final Map<Resource, String> NAMES = new HashMap<Resource, String>();
  private static final Map<String, Integer> NAMES_TO_COUNT = new HashMap<String, Integer>();


  /**
   * NO instances
   */
  private BeanGenerator() {
  }


  /**
   * Return the Java bean source code that represents the given RDF class
   * @param thePackageName the name of the package the source will be in
   * @param theGraph the repository containing information about the class
   * @param theClass the class that is to be turned into Java source
   * @param theMap the map of classes to the properties in their domain
   * @return a string of the source code of the equivalent Java bean
   * @throws Exception if there is an error while converting
   */
  private static String toSource(final String thePackageName, final Repository theGraph, final Resource theClass, final Map<Resource, Collection<URI>> theMap) throws Exception {
    StringBuffer aSrc = new StringBuffer();


    aSrc.append("package ").append(thePackageName).append(";\n\n");


    aSrc.append("import java.util.*;\n");
    aSrc.append("import javax.persistence.Entity;\n");
    aSrc.append("import com.clarkparsia.empire.SupportsRdfId;\n");
    aSrc.append("import com.clarkparsia.empire.annotation.*;\n\n");


    // TODO: more imports? less?


    Iterable<Resource> aSupers = Iterables2.present(Iterables.transform(AdunaIterations.iterable(Repositories.getStatements(theGraph, theClass, RDFS.SUBCLASSOF, null)),
                                                                        Statements.objectAsResource()));


    aSrc.append("@Entity\n");
    aSrc.append("@RdfsClass(\"").append(theClass).append("\")\n");
    aSrc.append("public interface ").append(className(theClass));


    aSupers = Collections2.filter(Sets.newHashSet(aSupers), new Predicate<Resource>() {
      public boolean apply(final Resource theValue) {
        return theValue != null &&
             !theValue.toString().startsWith(OWL.NAMESPACE)
             && !theValue.toString().startsWith(RDFS.NAMESPACE)
             && !theValue.toString().startsWith(RDF.NAMESPACE);
      }
    });


    boolean aNeedsComma = false;
    aSrc.append(" extends");


    if (aSupers.iterator().hasNext()) {
      for (Resource aSuper : aSupers) {
        if (aNeedsComma) {
          aSrc.append(",");
        }
        else {
          aNeedsComma = true;
        }


        aSrc.append(" ").append(className(aSuper));
      }
    }


    if (aNeedsComma) {
      aSrc.append(",");
    }


    aSrc.append(" SupportsRdfId");


    aSrc.append(" { \n\n");


    Collection<URI> aProps = props(theClass, theMap);


    for (URI aProp : aProps) {
      aSrc.append("@RdfProperty(\"").append(aProp).append("\")\n");
      aSrc.append("public ").append(functionType(theGraph, aProp)).append(" get").append(functionName(aProp)).append("();\n");
      aSrc.append("public void set").append(functionName(aProp)).append("(").append(functionType(theGraph, aProp)).append(" theValue);\n\n");
    }


    aSrc.append("}");


    return aSrc.toString();
  }


  /**
   * Return the type of the function (getter & setter), i.e. the bean property type, for the given rdf:Property
   * @param theRepo the graph of the ontology/data
   * @param theProp the property
   * @return the String representation of the property type
   * @throws Exception if there is an error querying the data
   */
  private static String functionType(final Repository theRepo, final URI theProp) throws Exception {
    String aType;


    Resource aRangeRes = Statements.objectAsResource().apply(AdunaIterations.singleResult(Repositories.getStatements(theRepo, theProp, RDFS.RANGE, null)).orNull()).orNull();


    if (aRangeRes instanceof BNode) {
      // we can't handle bnodes very well, so we're just going to assume Object
      return "Object";
    }


    URI aRange = (URI) aRangeRes;


    if (aRange == null) {
      // no explicit range, try to infer it...
      try {
        TupleQueryResult aResults = Repositories.selectQuery(theRepo, QueryLanguage.SERQL, "select distinct r from {s} <"+theProp+"> {o}, {o} rdf:type {r}");


        if (aResults.hasNext()) {
          URI aTempRange = (URI) aResults.next().getValue("r");
          if (!aResults.hasNext()) {
            aRange = aTempRange;
          }
          else {
            // TODO: leave range as null, the property is used for things of multiple different values.  so here
            // we should try and find the superclass of all the values and use that as the range.
          }
        }


        aResults.close();


        if (aRange == null) {
          // could not get it from type usage, so maybe its a literal and we can guess it from datatype


          aResults = Repositories.selectQuery(theRepo, QueryLanguage.SERQL, "select distinct datatype(o) as dt from {s} <"+theProp+"> {o} where isLiteral(o)");


          if (aResults.hasNext()) {
            URI aTempRange = null;
            while (aTempRange == null && aResults.hasNext()) {
              Literal aLit = (Literal) aResults.next().getValue("o");
              if (aLit != null){
                aTempRange = aLit.getDatatype();
              }
            }
            
            if (!aResults.hasNext()) {
              aRange = aTempRange;
            }
            else {
              // TODO: do something here, literals of multiple types used
            }
          }


          aResults.close();
        }
      }
      catch (Exception e) {
        // don't worry about it
        e.printStackTrace();
      }
    }


    if (XMLSchema.STRING.equals(aRange) || RDFS.LITERAL.equals(aRange)) {
      aType = "String";
    }
    else if (XMLSchema.BOOLEAN.equals(aRange)) {
      aType = "Boolean";
    }
    else if (integerTypes.contains(aRange)) {
      aType = "Integer";
    }
    else if (longTypes.contains(aRange)) {
      aType = "Long";
    }
    else if (XMLSchema.DOUBLE.equals(aRange)) {
      aType = "Double";
    }
    else if (floatTypes.contains(aRange)) {
      aType = "Float";
    }
    else if (shortTypes.contains(aRange)) {
      aType = "Short";
    }
    else if (byteTypes.contains(aRange)) {
      aType = "Byte";
    }
    else if (XMLSchema.ANYURI.equals(aRange)) {
      aType = "java.net.URI";
    }
    else if (XMLSchema.DATE.equals(aRange) || XMLSchema.DATETIME.equals(aRange)) {
      aType = "Date";
    }
    else if (XMLSchema.TIME.equals(aRange)) {
      aType = "Date";
    }
    else if (aRange == null || aRange.equals(OWL_THING)) {
      aType = "Object";
    }
    else {
      aType = className(aRange);
    }


    if (isCollection(theRepo, theProp)) {
      aType = "Collection<? extends " + aType + ">";
    }


    return aType;
  }


  /**
   * Determine whether or not the property's range is a collection.  This will inspect both the ontology, for cardinality
   * restrictions, and when that is not available, it will use the actual structure of the data.
   * @param theRepo the graph of the ontology/data
   * @param theProp the property
   * @return true if the property has a collection as it's value, false if it's just a single valued property
   * @throws Exception if there is an error querying the data
   */
  private static boolean isCollection(final Repository theRepo, final URI theProp) throws Exception {
    // TODO: this is not fool proof.


    String aCardQuery = "select distinct ?card where {\n" +
             "?s rdf:type owl:Restriction.\n" +
             "?s owl:onProperty <"+theProp+">.\n" +
             "?s ?cardProp ?card.\n" +
             "FILTER (?cardProp = owl:cardinality || ?cardProp = owl:minCardinality || ?cardProp = owl:maxCardinality)\n" +
             "}";
      TupleQueryResult aResults = Repositories.selectQuery(theRepo, QueryLanguage.SPARQL ,aCardQuery);
    if (aResults.hasNext()) {
      Literal aCard = (Literal) aResults.next().getValue("card") ;


      try {
        return Integer.parseInt(aCard.getLabel()) > 1;
      }
      catch (NumberFormatException e) {
        LOGGER.error("Unparseable cardinality value for '" + theProp + "' of '" + aCard + "'", e);
      }
    }


    aResults.close();


    try {
      aResults = Repositories.selectQuery(theRepo, QueryLanguage.SPARQL, "select distinct ?s where  { ?s <"+theProp+"> ?o}");
      for (BindingSet aBinding : AdunaIterations.iterable(aResults)) {


        Collection aCollection = Sets.newHashSet(Iterators2.present(Iterators.transform(AdunaIterations.iterator(Repositories.getStatements(theRepo, (Resource) aBinding.getValue("s"), theProp, null)),
                                                                                        Statements.objectOptional())));
        if (aCollection.size() > 1) {
          return true;
        }
      }


      return false;
    }
    finally {
      aResults.close();
    }
  }


  /**
   * Return the name of the function (the bean property) for this rdf:Property
   * @param theProp the rdf:Property
   * @return the name of the Java property/function name
   */
  private static String functionName(final URI theProp) {
    return className(theProp);
  }


  /**
   * Return all the properties for the given resource.  This will return only the properties which are directly
   * associated with the class, not any properties from its parent, or otherwise inferred from the data.
   * @param theRes the resource
   * @param theMap the map of resources to properties
   * @return a collection of the proeprties associated with the class
   */
  private static Collection<URI> props(final Resource theRes, final Map<Resource, Collection<URI>> theMap) {
    Collection<URI> aProps = new HashSet<URI>();


    if (theMap.containsKey(theRes)) {
      aProps.addAll(theMap.get(theRes));
    }


    return aProps;
  }


  /**
   * Given a Resource, return the Java class name for that resource
   * @param theClass the resource
   * @return the name of the Java class
   */
  private static String className(Resource theClass) {
    if (NAMES.containsKey(theClass)) {
      return NAMES.get(theClass);
    }


    String aLabel;


    if (theClass instanceof URI) {
      aLabel = ((URI) theClass).getLocalName();
    }
    else {
      aLabel = theClass.stringValue();
    }


    aLabel = String.valueOf(aLabel.charAt(0)).toUpperCase() + aLabel.substring(1);


    aLabel = aLabel.replaceAll(" ", "");


    if (NAMES_TO_COUNT.containsKey(aLabel)) {
      String aNewLabel = aLabel + NAMES_TO_COUNT.get(aLabel);


      NAMES_TO_COUNT.put(aLabel, NAMES_TO_COUNT.get(aLabel)+1);


      aLabel = aNewLabel;
    }
    else {
      NAMES_TO_COUNT.put(aLabel, 0);
    }


    NAMES.put(theClass, aLabel);


    return aLabel;
  }


  /**
   * Given an ontology/schema, generate Empire compatible Java beans for each class in the ontology.
   * @param thePackageName the name of the packages the source should belong to
   * @param theOntology the location of the ontology to load
   * @param theFormat the RDF format the ontology is in
   * @param theDirToSave where to save the generated source code
   * @throws Exception if there is an error while generating the source
   */
  public static void generateSourceFiles(String thePackageName, URL theOntology, RDFFormat theFormat, File theDirToSave) throws Exception {
    NAMES_TO_COUNT.clear();


    Repository aRepository = Repositories.createInMemoryRepo();


    Repositories.add(aRepository, theOntology.openStream(), theFormat);


    Collection<Resource> aClasses = Sets.newHashSet(Iterators.transform(new MultiIterator<Statement>(AdunaIterations.iterator(Repositories.getStatements(aRepository, null, RDF.TYPE, RDFS.CLASS)),
                                                     AdunaIterations.iterator(Repositories.getStatements(aRepository, null, RDF.TYPE, OWL.CLASS))),
                                      new StatementToSubject()));


    aClasses = Collections2.filter(aClasses, new Predicate<Resource>() { public boolean apply(Resource theRes) { return theRes instanceof URI; } });


    Collection<Resource> aIndClasses = Sets.newHashSet(Iterators.transform(AdunaIterations.iterator(Repositories.getStatements(aRepository, null, RDF.TYPE, null)),
                                                                           Functions.compose(Functions2.<Value, Resource>cast(Resource.class),
                                                                                             new StatementToObject())));


    aClasses.addAll(aIndClasses);


    aClasses = Collections2.filter(aClasses, new Predicate<Resource>() {
      public boolean apply(final Resource theValue) {
        return !theValue.stringValue().startsWith(RDFS.NAMESPACE)
             && !theValue.stringValue().startsWith(RDF.NAMESPACE)
             && !theValue.stringValue().startsWith(OWL.NAMESPACE);
      }
    });


    Map<Resource, Collection<URI>> aMap = new HashMap<Resource, Collection<URI>>();


    for (Resource aClass : aClasses) {
      if (aClass instanceof BNode) { continue; }
      Collection<URI> aProps = Sets.newHashSet(Iterators.transform(AdunaIterations.iterator(Repositories.getStatements(aRepository, null, RDFS.DOMAIN, aClass)),
                                                                   Functions.compose(Functions2.<Resource, URI>cast(URI.class),
                                                                                     new StatementToSubject())));


      // infer properties based on usage in actual instance data
      for (BindingSet aBinding : AdunaIterations.iterable(Repositories.selectQuery(aRepository, QueryLanguage.SPARQL, "select distinct ?p where { ?s rdf:type <" + aClass + ">. ?s ?p ?o }"))) {
        aProps.add( (URI) aBinding.getValue("p"));
      }


      // don't include rdf:type as a property
      aProps = Collections2.filter(aProps, new Predicate<URI>() {
        public boolean apply(final URI theValue) {
          return !RDF.TYPE.equals(theValue);
        }
      });


      aMap.put(aClass, aProps);
    }


    if (!theDirToSave.exists()) {
      if (!theDirToSave.mkdirs()) {
        throw new IOException("Could not create output directory");
      }
    }


    for (Resource aClass :  aMap.keySet()) {
      String aSrc = toSource(thePackageName, aRepository, aClass, aMap);


      if (aSrc == null) {
        continue;
      }


      File aFile = new File(theDirToSave, className(aClass) + ".java");


      System.out.println("Writing source to file: " + aFile.getName());


      Files.write(aSrc, aFile, Charsets.UTF_8);
    }
  }


  public static void main(String[] args) throws Exception {
    //aGraph.read(new URL("http://xmlns.com/foaf/spec/index.rdf").openStream());
//    File aOut = new File("/Users/mhgrove/work/GitHub/empire/core/src/com/clarkparsia/empire/codegen/test/");
//
//    generateSourceFiles("com.clarkparsia.empire.codegen.test", new File("test/data/nasa.nt").toURI().toURL(), RDFFormat.NTRIPLES, aOut);


    if (args.length < 4) {
      System.err.println("Must provide four arguments to the program, the package name, ontology URL, rdf format of the ontology (rdf/xml|turtle|ntriples), and the output directory for the source code.\n");
      System.err.println("For example:\n");
      System.err.println("\tBeanGenerator my.package.domain /usr/local/files/myontology.ttl turtle /usr/local/code/src/my/package/domain");


      return;
    }


    URL aURL;


    if (NetUtils.isURL(args[1])) {
      aURL = new URL(args[1]);
    }
    else {
      aURL = new File(args[1]).toURI().toURL();
    }


    generateSourceFiles(args[0], aURL, RDFFormat.valueOf(args[2]), new File(args[3]));
  }


  private static class StatementToObject implements Function<Statement, Value> {
    public Value apply(final Statement theIn) {
      return theIn.getObject();
    }
  }


  private static class StatementToSubject implements Function<Statement, Resource> {
    public Resource apply(final Statement theIn) {
      return theIn.getSubject();
    }
  }
}
Source Code of com.clarkparsia.empire.codegen.BeanGenerator$StatementToSubject

Related Classes of com.clarkparsia.empire.codegen.BeanGenerator$StatementToSubject