Package com.clarkparsia.empire.codegen

Source Code of com.clarkparsia.empire.codegen.BeanGenerator$StatementToSubject

/*
* Copyright (c) 2009-2012 Clark & Parsia, LLC. <http://www.clarkparsia.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.clarkparsia.empire.codegen;

import com.complexible.common.collect.Iterables2;
import com.complexible.common.collect.Iterators2;
import com.complexible.common.openrdf.model.Statements;
import com.complexible.common.openrdf.repository.Repositories;
import com.google.common.collect.Iterables;
import org.openrdf.model.Resource;
import org.openrdf.model.URI;
import org.openrdf.model.Statement;
import org.openrdf.model.Value;
import org.openrdf.model.Literal;
import org.openrdf.model.BNode;
import org.openrdf.model.vocabulary.OWL;
import org.openrdf.model.vocabulary.XMLSchema;
import org.openrdf.model.vocabulary.RDFS;
import org.openrdf.model.vocabulary.RDF;

import org.openrdf.model.impl.ValueFactoryImpl;

import org.openrdf.query.QueryLanguage;
import org.openrdf.repository.Repository;
import org.openrdf.rio.RDFFormat;

import org.openrdf.query.BindingSet;
import org.openrdf.query.TupleQueryResult;

import com.complexible.common.openrdf.util.AdunaIterations;

import com.complexible.common.collect.MultiIterator;

import com.complexible.common.net.NetUtils;
import com.complexible.common.base.Functions2;
import com.google.common.base.Predicate;
import com.google.common.base.Charsets;
import com.google.common.base.Function;
import com.google.common.base.Functions;
import com.google.common.collect.Collections2;
import com.google.common.collect.Sets;
import com.google.common.collect.Iterators;
import com.google.common.io.Files;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Collection;
import java.util.Map;
import java.util.List;
import java.util.Arrays;
import java.util.HashSet;
import java.util.HashMap;

import java.io.File;
import java.io.IOException;

import java.net.URL;

/**
* <p>Generate a set of Java beans which are compatible with Empire from a given RDF schema, OWL ontology, or blob
* of RDF data.  The generated source code will map to the domain represented in the RDF.</p>
*
* @author  Michael Grove
* @since  0.5.1
* @version  0.7.3
*/
public final class BeanGenerator {
  /**
   * The logger
   */
  private static final Logger LOGGER = LoggerFactory.getLogger(BeanGenerator.class);

  /**
   * String URI constant for the owl:Thing conccept
   */
  private static final URI OWL_THING = ValueFactoryImpl.getInstance().createURI(OWL.NAMESPACE + "Thing");

  /**
   * The list of xsd datatypes which map to Integer
   */
  private static final List<URI> integerTypes = Arrays.asList(XMLSchema.INT, XMLSchema.INTEGER, XMLSchema.POSITIVE_INTEGER,
                               XMLSchema.NEGATIVE_INTEGER, XMLSchema.NON_NEGATIVE_INTEGER,
                               XMLSchema.NON_POSITIVE_INTEGER, XMLSchema.UNSIGNED_INT);

  /**
   * The list of xsd datatypes which map to Long
   */
  private static final List<URI> longTypes = Arrays.asList(XMLSchema.LONG, XMLSchema.UNSIGNED_LONG);

  /**
   * The list of xsd datatypes which map to Float
   */
  private static final List<URI> floatTypes = Arrays.asList(XMLSchema.FLOAT, XMLSchema.DECIMAL);

  /**
   * The list of xsd datatypes which map to Short
   */
  private static final List<URI> shortTypes = Arrays.asList(XMLSchema.SHORT, XMLSchema.UNSIGNED_SHORT);

  /**
   * The list of xsd datatypes which map to Byte
   */
  private static final List<URI> byteTypes = Arrays.asList(XMLSchema.BYTE, XMLSchema.UNSIGNED_BYTE);

  private static final Map<Resource, String> NAMES = new HashMap<Resource, String>();
  private static final Map<String, Integer> NAMES_TO_COUNT = new HashMap<String, Integer>();

  /**
   * NO instances
   */
  private BeanGenerator() {
  }

  /**
   * Return the Java bean source code that represents the given RDF class
   * @param thePackageName the name of the package the source will be in
   * @param theGraph the repository containing information about the class
   * @param theClass the class that is to be turned into Java source
   * @param theMap the map of classes to the properties in their domain
   * @return a string of the source code of the equivalent Java bean
   * @throws Exception if there is an error while converting
   */
  private static String toSource(final String thePackageName, final Repository theGraph, final Resource theClass, final Map<Resource, Collection<URI>> theMap) throws Exception {
    StringBuffer aSrc = new StringBuffer();

    aSrc.append("package ").append(thePackageName).append(";\n\n");

    aSrc.append("import java.util.*;\n");
    aSrc.append("import javax.persistence.Entity;\n");
    aSrc.append("import com.clarkparsia.empire.SupportsRdfId;\n");
    aSrc.append("import com.clarkparsia.empire.annotation.*;\n\n");

    // TODO: more imports? less?

    Iterable<Resource> aSupers = Iterables2.present(Iterables.transform(AdunaIterations.iterable(Repositories.getStatements(theGraph, theClass, RDFS.SUBCLASSOF, null)),
                                                                        Statements.objectAsResource()));

    aSrc.append("@Entity\n");
    aSrc.append("@RdfsClass(\"").append(theClass).append("\")\n");
    aSrc.append("public interface ").append(className(theClass));

    aSupers = Collections2.filter(Sets.newHashSet(aSupers), new Predicate<Resource>() {
      public boolean apply(final Resource theValue) {
        return theValue != null &&
             !theValue.toString().startsWith(OWL.NAMESPACE)
             && !theValue.toString().startsWith(RDFS.NAMESPACE)
             && !theValue.toString().startsWith(RDF.NAMESPACE);
      }
    });

    boolean aNeedsComma = false;
    aSrc.append(" extends");

    if (aSupers.iterator().hasNext()) {
      for (Resource aSuper : aSupers) {
        if (aNeedsComma) {
          aSrc.append(",");
        }
        else {
          aNeedsComma = true;
        }

        aSrc.append(" ").append(className(aSuper));
      }
    }

    if (aNeedsComma) {
      aSrc.append(",");
    }

    aSrc.append(" SupportsRdfId");

    aSrc.append(" { \n\n");

    Collection<URI> aProps = props(theClass, theMap);

    for (URI aProp : aProps) {
      aSrc.append("@RdfProperty(\"").append(aProp).append("\")\n");
      aSrc.append("public ").append(functionType(theGraph, aProp)).append(" get").append(functionName(aProp)).append("();\n");
      aSrc.append("public void set").append(functionName(aProp)).append("(").append(functionType(theGraph, aProp)).append(" theValue);\n\n");
    }

    aSrc.append("}");

    return aSrc.toString();
  }

  /**
   * Return the type of the function (getter & setter), i.e. the bean property type, for the given rdf:Property
   * @param theRepo the graph of the ontology/data
   * @param theProp the property
   * @return the String representation of the property type
   * @throws Exception if there is an error querying the data
   */
  private static String functionType(final Repository theRepo, final URI theProp) throws Exception {
    String aType;

    Resource aRangeRes = Statements.objectAsResource().apply(AdunaIterations.singleResult(Repositories.getStatements(theRepo, theProp, RDFS.RANGE, null)).orNull()).orNull();

    if (aRangeRes instanceof BNode) {
      // we can't handle bnodes very well, so we're just going to assume Object
      return "Object";
    }

    URI aRange = (URI) aRangeRes;

    if (aRange == null) {
      // no explicit range, try to infer it...
      try {
        TupleQueryResult aResults = Repositories.selectQuery(theRepo, QueryLanguage.SERQL, "select distinct r from {s} <"+theProp+"> {o}, {o} rdf:type {r}");

        if (aResults.hasNext()) {
          URI aTempRange = (URI) aResults.next().getValue("r");
          if (!aResults.hasNext()) {
            aRange = aTempRange;
          }
          else {
            // TODO: leave range as null, the property is used for things of multiple different values.  so here
            // we should try and find the superclass of all the values and use that as the range.
          }
        }

        aResults.close();

        if (aRange == null) {
          // could not get it from type usage, so maybe its a literal and we can guess it from datatype

          aResults = Repositories.selectQuery(theRepo, QueryLanguage.SERQL, "select distinct datatype(o) as dt from {s} <"+theProp+"> {o} where isLiteral(o)");

          if (aResults.hasNext()) {
            URI aTempRange = null;
            while (aTempRange == null && aResults.hasNext()) {
              Literal aLit = (Literal) aResults.next().getValue("o");
              if (aLit != null){
                aTempRange = aLit.getDatatype();
              }
            }
           
            if (!aResults.hasNext()) {
              aRange = aTempRange;
            }
            else {
              // TODO: do something here, literals of multiple types used
            }
          }

          aResults.close();
        }
      }
      catch (Exception e) {
        // don't worry about it
        e.printStackTrace();
      }
    }

    if (XMLSchema.STRING.equals(aRange) || RDFS.LITERAL.equals(aRange)) {
      aType = "String";
    }
    else if (XMLSchema.BOOLEAN.equals(aRange)) {
      aType = "Boolean";
    }
    else if (integerTypes.contains(aRange)) {
      aType = "Integer";
    }
    else if (longTypes.contains(aRange)) {
      aType = "Long";
    }
    else if (XMLSchema.DOUBLE.equals(aRange)) {
      aType = "Double";
    }
    else if (floatTypes.contains(aRange)) {
      aType = "Float";
    }
    else if (shortTypes.contains(aRange)) {
      aType = "Short";
    }
    else if (byteTypes.contains(aRange)) {
      aType = "Byte";
    }
    else if (XMLSchema.ANYURI.equals(aRange)) {
      aType = "java.net.URI";
    }
    else if (XMLSchema.DATE.equals(aRange) || XMLSchema.DATETIME.equals(aRange)) {
      aType = "Date";
    }
    else if (XMLSchema.TIME.equals(aRange)) {
      aType = "Date";
    }
    else if (aRange == null || aRange.equals(OWL_THING)) {
      aType = "Object";
    }
    else {
      aType = className(aRange);
    }

    if (isCollection(theRepo, theProp)) {
      aType = "Collection<? extends " + aType + ">";
    }

    return aType;
  }

  /**
   * Determine whether or not the property's range is a collection.  This will inspect both the ontology, for cardinality
   * restrictions, and when that is not available, it will use the actual structure of the data.
   * @param theRepo the graph of the ontology/data
   * @param theProp the property
   * @return true if the property has a collection as it's value, false if it's just a single valued property
   * @throws Exception if there is an error querying the data
   */
  private static boolean isCollection(final Repository theRepo, final URI theProp) throws Exception {
    // TODO: this is not fool proof.

    String aCardQuery = "select distinct ?card where {\n" +
             "?s rdf:type owl:Restriction.\n" +
             "?s owl:onProperty <"+theProp+">.\n" +
             "?s ?cardProp ?card.\n" +
             "FILTER (?cardProp = owl:cardinality || ?cardProp = owl:minCardinality || ?cardProp = owl:maxCardinality)\n" +
             "}";
      TupleQueryResult aResults = Repositories.selectQuery(theRepo, QueryLanguage.SPARQL ,aCardQuery);
    if (aResults.hasNext()) {
      Literal aCard = (Literal) aResults.next().getValue("card") ;

      try {
        return Integer.parseInt(aCard.getLabel()) > 1;
      }
      catch (NumberFormatException e) {
        LOGGER.error("Unparseable cardinality value for '" + theProp + "' of '" + aCard + "'", e);
      }
    }

    aResults.close();

    try {
      aResults = Repositories.selectQuery(theRepo, QueryLanguage.SPARQL, "select distinct ?s where  { ?s <"+theProp+"> ?o}");
      for (BindingSet aBinding : AdunaIterations.iterable(aResults)) {

        Collection aCollection = Sets.newHashSet(Iterators2.present(Iterators.transform(AdunaIterations.iterator(Repositories.getStatements(theRepo, (Resource) aBinding.getValue("s"), theProp, null)),
                                                                                        Statements.objectOptional())));
        if (aCollection.size() > 1) {
          return true;
        }
      }

      return false;
    }
    finally {
      aResults.close();
    }
  }

  /**
   * Return the name of the function (the bean property) for this rdf:Property
   * @param theProp the rdf:Property
   * @return the name of the Java property/function name
   */
  private static String functionName(final URI theProp) {
    return className(theProp);
  }

  /**
   * Return all the properties for the given resource.  This will return only the properties which are directly
   * associated with the class, not any properties from its parent, or otherwise inferred from the data.
   * @param theRes the resource
   * @param theMap the map of resources to properties
   * @return a collection of the proeprties associated with the class
   */
  private static Collection<URI> props(final Resource theRes, final Map<Resource, Collection<URI>> theMap) {
    Collection<URI> aProps = new HashSet<URI>();

    if (theMap.containsKey(theRes)) {
      aProps.addAll(theMap.get(theRes));
    }

    return aProps;
  }

  /**
   * Given a Resource, return the Java class name for that resource
   * @param theClass the resource
   * @return the name of the Java class
   */
  private static String className(Resource theClass) {
    if (NAMES.containsKey(theClass)) {
      return NAMES.get(theClass);
    }

    String aLabel;

    if (theClass instanceof URI) {
      aLabel = ((URI) theClass).getLocalName();
    }
    else {
      aLabel = theClass.stringValue();
    }

    aLabel = String.valueOf(aLabel.charAt(0)).toUpperCase() + aLabel.substring(1);

    aLabel = aLabel.replaceAll(" ", "");

    if (NAMES_TO_COUNT.containsKey(aLabel)) {
      String aNewLabel = aLabel + NAMES_TO_COUNT.get(aLabel);

      NAMES_TO_COUNT.put(aLabel, NAMES_TO_COUNT.get(aLabel)+1);

      aLabel = aNewLabel;
    }
    else {
      NAMES_TO_COUNT.put(aLabel, 0);
    }

    NAMES.put(theClass, aLabel);

    return aLabel;
  }

  /**
   * Given an ontology/schema, generate Empire compatible Java beans for each class in the ontology.
   * @param thePackageName the name of the packages the source should belong to
   * @param theOntology the location of the ontology to load
   * @param theFormat the RDF format the ontology is in
   * @param theDirToSave where to save the generated source code
   * @throws Exception if there is an error while generating the source
   */
  public static void generateSourceFiles(String thePackageName, URL theOntology, RDFFormat theFormat, File theDirToSave) throws Exception {
    NAMES_TO_COUNT.clear();

    Repository aRepository = Repositories.createInMemoryRepo();

    Repositories.add(aRepository, theOntology.openStream(), theFormat);

    Collection<Resource> aClasses = Sets.newHashSet(Iterators.transform(new MultiIterator<Statement>(AdunaIterations.iterator(Repositories.getStatements(aRepository, null, RDF.TYPE, RDFS.CLASS)),
                                                     AdunaIterations.iterator(Repositories.getStatements(aRepository, null, RDF.TYPE, OWL.CLASS))),
                                      new StatementToSubject()));

    aClasses = Collections2.filter(aClasses, new Predicate<Resource>() { public boolean apply(Resource theRes) { return theRes instanceof URI; } });

    Collection<Resource> aIndClasses = Sets.newHashSet(Iterators.transform(AdunaIterations.iterator(Repositories.getStatements(aRepository, null, RDF.TYPE, null)),
                                                                           Functions.compose(Functions2.<Value, Resource>cast(Resource.class),
                                                                                             new StatementToObject())));

    aClasses.addAll(aIndClasses);

    aClasses = Collections2.filter(aClasses, new Predicate<Resource>() {
      public boolean apply(final Resource theValue) {
        return !theValue.stringValue().startsWith(RDFS.NAMESPACE)
             && !theValue.stringValue().startsWith(RDF.NAMESPACE)
             && !theValue.stringValue().startsWith(OWL.NAMESPACE);
      }
    });

    Map<Resource, Collection<URI>> aMap = new HashMap<Resource, Collection<URI>>();

    for (Resource aClass : aClasses) {
      if (aClass instanceof BNode) { continue; }
      Collection<URI> aProps = Sets.newHashSet(Iterators.transform(AdunaIterations.iterator(Repositories.getStatements(aRepository, null, RDFS.DOMAIN, aClass)),
                                                                   Functions.compose(Functions2.<Resource, URI>cast(URI.class),
                                                                                     new StatementToSubject())));

      // infer properties based on usage in actual instance data
      for (BindingSet aBinding : AdunaIterations.iterable(Repositories.selectQuery(aRepository, QueryLanguage.SPARQL, "select distinct ?p where { ?s rdf:type <" + aClass + ">. ?s ?p ?o }"))) {
        aProps.add( (URI) aBinding.getValue("p"));
      }

      // don't include rdf:type as a property
      aProps = Collections2.filter(aProps, new Predicate<URI>() {
        public boolean apply(final URI theValue) {
          return !RDF.TYPE.equals(theValue);
        }
      });

      aMap.put(aClass, aProps);
    }

    if (!theDirToSave.exists()) {
      if (!theDirToSave.mkdirs()) {
        throw new IOException("Could not create output directory");
      }
    }

    for (Resource aClass :  aMap.keySet()) {
      String aSrc = toSource(thePackageName, aRepository, aClass, aMap);

      if (aSrc == null) {
        continue;
      }

      File aFile = new File(theDirToSave, className(aClass) + ".java");

      System.out.println("Writing source to file: " + aFile.getName());

      Files.write(aSrc, aFile, Charsets.UTF_8);
    }
  }

  public static void main(String[] args) throws Exception {
    //aGraph.read(new URL("http://xmlns.com/foaf/spec/index.rdf").openStream());
//    File aOut = new File("/Users/mhgrove/work/GitHub/empire/core/src/com/clarkparsia/empire/codegen/test/");
//
//    generateSourceFiles("com.clarkparsia.empire.codegen.test", new File("test/data/nasa.nt").toURI().toURL(), RDFFormat.NTRIPLES, aOut);

    if (args.length < 4) {
      System.err.println("Must provide four arguments to the program, the package name, ontology URL, rdf format of the ontology (rdf/xml|turtle|ntriples), and the output directory for the source code.\n");
      System.err.println("For example:\n");
      System.err.println("\tBeanGenerator my.package.domain /usr/local/files/myontology.ttl turtle /usr/local/code/src/my/package/domain");

      return;
    }

    URL aURL;

    if (NetUtils.isURL(args[1])) {
      aURL = new URL(args[1]);
    }
    else {
      aURL = new File(args[1]).toURI().toURL();
    }

    generateSourceFiles(args[0], aURL, RDFFormat.valueOf(args[2]), new File(args[3]));
  }

  private static class StatementToObject implements Function<Statement, Value> {
    public Value apply(final Statement theIn) {
      return theIn.getObject();
    }
  }

  private static class StatementToSubject implements Function<Statement, Resource> {
    public Resource apply(final Statement theIn) {
      return theIn.getSubject();
    }
  }
}
TOP

Related Classes of com.clarkparsia.empire.codegen.BeanGenerator$StatementToSubject

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.