Package edu.mit.simile.backstage.util

Source Code of edu.mit.simile.backstage.util.DataLoadingUtilities$ModelReadFromFileException

package edu.mit.simile.backstage.util;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.zip.GZIPInputStream;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.openrdf.model.BNode;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.impl.URIImpl;
import org.openrdf.repository.Repository;
import org.openrdf.repository.RepositoryException;
import org.openrdf.repository.sail.SailRepository;
import org.openrdf.rio.ParseErrorListener;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFHandler;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.RDFParser;
import org.openrdf.rio.ntriples.NTriplesParser;
import org.openrdf.rio.rdfxml.RDFXMLParser;
import org.openrdf.rio.turtle.TurtleParser;
import org.openrdf.sail.Sail;
import org.openrdf.sail.SailConnection;
import org.openrdf.sail.SailException;
import org.openrdf.sail.memory.MemoryStore;
import org.openrdf.sail.nativerdf.NativeStore;

import edu.mit.simile.babel.BabelReader;
import edu.mit.simile.babel.exhibit.ExhibitJsonReader;

public class DataLoadingUtilities {
    protected static Logger _logger = LoggerFactory.getLogger("backstage.data-loading");
   
    final static public String s_bnodePrefix = "urn:bnode:";

    static public class RepoSailTuple {
        public RepoSailTuple(Repository r, Sail s) {
            repository = r;
            sail = s;
        }

        public Repository repository;
        public Sail sail;
    }
   
    static public RepoSailTuple createMemoryRepository(File dir) {
        try {
            Repository r = null;
            Sail s = null;
            if (dir != null) {
                s = new MemoryStore(dir);
                r = new SailRepository(s);
            } else {
                s = new MemoryStore();
                r = new SailRepository(new MemoryStore());
            }
            r.initialize();

            return new RepoSailTuple(r,s);
        } catch (Exception e) {
            _logger.error("Exception caught while creating Sesame in-memory repository", e);
            return null;
        }
    }

    static public RepoSailTuple createNativeRepository(File dir) {
        try {
            Sail sail = new NativeStore();
            sail.setDataDir(dir);
            ((NativeStore) sail).setTripleIndexes("spoc,posc,opsc");
            Repository r = new SailRepository(sail);
            r.initialize();
            return new RepoSailTuple(r,sail);
        } catch (Exception e) {
            _logger.error("Exception caught while creating Sesame native repository", e);
            return null;
        }
    }
   
    static public String fileToModelLang(File file) {
        return filenameToLang(file.getName());
    }

    static public String urlToModelLang(URL url, String contentType) {
        return urlToModelLang(url.getPath(), contentType);
    }

    static public String urlToModelLang(String url, String contentType) {
        String lang = null;

        if (contentType != null) {
            lang = contentTypeToLang(contentType);
        }
        if (lang == null) {
            lang = filenameToLang(url);
        }

        if (_logger.isDebugEnabled()) _logger.debug(url + " -> " + lang);
        return lang;
    }

    static public String contentTypeToLang(String contentType) {
        String lang = null;
        if ("application/rss+xml".equals(contentType) || "application/atom+xml".equals(contentType)) {
            lang = "RSS";
        } else if ("application/rdf+xml".equals(contentType) || "text/xml".equals(contentType)) {
            lang = "RDFXML";
        } else if ("application/n3".equals(contentType) || "text/rdf+n3".equals(contentType)
                || "application/turtle".equals(contentType) || "application/x-turtle".equals(contentType)) {
            lang = "N3";
        } else if ("application/json".equals(contentType)) {
            lang = "Exhibit/JSON";
        }
        if (_logger.isDebugEnabled()) _logger.debug(contentType + " -> " + lang);
        return lang;
    }
   
    static public String filenameToLang(String filename) {
        String contentType = URLConnection.guessContentTypeFromName(filename);
        String lang = null;

        if (contentType != null) {
            lang = contentTypeToLang(contentType);
        }

        if (lang == null) {
            if (filename.endsWith(".gz")) {
                filename = filename.substring(0, filename.length() - ".gz".length());
            }
            if (filename.endsWith(".n3")) {
                lang = "N3";
            } else if (filename.endsWith(".turtle")) {
                lang = "TURTLE";
            } else if (filename.endsWith(".ntriples")) {
                lang = "NTRIPLES";
            } else if (filename.endsWith(".rss")) {
                lang = "RSS";
            } else if (filename.endsWith(".rdf")
                    || filename.endsWith(".rdfs")
                    || filename.endsWith(".xml")
                    || filename.endsWith(".owl")) {
                lang = "RDFXML";
            } else if (filename.endsWith(".json")
                    || filename.endsWith(".js")) {
              lang = "Exhibit/JSON";
            }
        }

        return lang;
    }

    static public RDFFormat langToFormat(String lang) {
        return RDFFormat.valueOf(lang);
    }

    static public String uriToFilename(String uri) {
        return uri.replace(':', '_').replace('/', '_');
    }
   
    static public void loadDataFromDir(File dir, Sail sail, boolean forgiving) throws Exception {
        if (!dir.exists()) {
            throw new FileNotFoundException("Cannot load data from " + dir.getAbsolutePath());
        }
       
        _logger.info("Loading data from dir " + dir.getAbsolutePath());

        File[] files = dir.listFiles();

        for (int i = 0; i < files.length; i++) {
            File file = files[i];

            if (!file.isHidden()) {
                if (file.isDirectory()) {
                    loadDataFromDir(file, sail, forgiving);
                } else {
                    if (forgiving) {
                        try {
                            loadDataFromFile(file, sail);
                        } catch (Exception e) {
                            _logger.warn("Failed to load data from " + file.getCanonicalPath(), e);
                        }
                    } else {
                        loadDataFromFile(file, sail);
                    }
                }
            }
        }
    }

    static public InputStream getStreamForFile(File file) throws Exception {
        InputStream stream = new FileInputStream(file);
        String name = file.getName();
        if (name.endsWith(".gz")) {
            stream = new GZIPInputStream(stream);
        }
        return stream;
    }
   
    static public void loadDataFromFile(File file, Sail sail) throws Exception {
        String lang = fileToModelLang(file);
        if (lang != null) {
          _logger.info("Loading data from file " + file.getAbsolutePath());
         
            InputStream fis = getStreamForFile(file);
            try {
                loadDataFromStream(fis, file.toURI().toURL().toExternalForm(), lang, sail);
            } catch (Exception e) {
                throw new RuntimeException("Error loading data from file: " + file + " " + e.getMessage());
            } finally {
                fis.close();
            }
        } else {
            throw new ModelReadFromFileException("Unknown data format in " + file.getAbsolutePath());
        }
    }

    static public void loadDataFromConnection(URLConnection conn, URL url, String lang, String contentType, Sail sail) throws Exception {
        if (lang != null) {
            InputStream stream = conn.getInputStream();
            try {
                loadDataFromStream(stream, url.toExternalForm(), lang, sail);
            } catch (Exception e) {
                throw new RuntimeException("Error loading data from URL: " + url + " " + e.getMessage());
            } finally {
                stream.close();
            }
        } else {
            throw new ModelReadFromFileException("Unknown data format in " + url.toExternalForm());
        }
    }

    static public void loadDataFromURL(URL url, String contentType, Sail sail) throws Exception {
        String lang = urlToModelLang(url, contentType);
        if (lang != null) {
            URLConnection conn = url.openConnection();
           
            //setRequestHeaders(conn, LongwellUtilities.getLabel() + "/" + LongwellUtilities.getVersion());
           
            conn.connect();
            InputStream stream = conn.getInputStream();
            try {
                loadDataFromStream(stream, url.toExternalForm(), lang, sail);
            } catch (Exception e) {
                throw new RuntimeException("Error loading data from URL: " + url + " " + e.getMessage());
            } finally {
                stream.close();
            }
        } else {
            throw new ModelReadFromFileException("Unknown data format in " + url.toExternalForm());
        }
    }

    static public void setRequestHeaders(URLConnection conn, String ua) {
        conn.setRequestProperty("User-Agent", ua);
        conn.setRequestProperty("Accept", "application/rdf+xml, text/rdf+n3");
    }

    static public void loadDataFromStream(InputStream stream, String sourceURL, String lang, Sail sail) throws Exception {
        RepoSailTuple rs = createMemoryRepository(null);
        Repository r = rs.repository;

        lang = lang.toLowerCase();
        if ("exhibit/json".equals(lang)) {
            Properties properties = new Properties();
           
            BabelReader reader = new ExhibitJsonReader();
            try {
                if (reader.takesReader()) {
                    InputStreamReader isr = new InputStreamReader(stream);
                    reader.read(isr, sail, properties, Locale.getDefault());
                } else {
                     reader.read(stream, sail, properties, Locale.getDefault());
                }
            } finally {
                stream.close();
            }
        } else {
            RDFParser parser = null;
            if ("rdfxml".equals(lang)) {
                parser = new RDFXMLParser(r.getValueFactory());
            } else if ("n3".equals(lang) || "turtle".equals(lang)) {
                parser = new TurtleParser(r.getValueFactory());
            } else if ("ntriples".equals(lang)) {
                parser = new NTriplesParser(r.getValueFactory());
            }
           
            try {
              SailConnection c = null;
              try {
                  c = sail.getConnection();
                  BNodeConverterStatementHandler handler = new BNodeConverterStatementHandler(c);
 
                  parser.setRDFHandler(handler);
                  parser.setParseErrorListener(new LoggingParseErrorListener(sourceURL));
                  parser.setVerifyData(false);
                  parser.setStopAtFirstError(false);
 
                  parser.parse(stream, sourceURL);
                 
                  c.commit();
 
                  _logger.info("Read " + handler.m_count + " statements from '" + sourceURL + "'");
              } catch (RepositoryException e) {
                  if (c != null) c.rollback();
              } finally {
                  if (c != null) c.close();
              }
          } catch (Exception e) {
              throw new ModelReadFromFileException("Failed to read data from '" + sourceURL + "'", e);
          } finally {
              stream.close();
          }
        }
    }
   
    static class ModelReadFromFileException extends IOException {
        private static final long serialVersionUID = -5802084055919147883L;

        ModelReadFromFileException(String s) {
            super(s);
        }

        ModelReadFromFileException(String s, Throwable e) {
            super(s);
            initCause(e);
        }
    }

    static class BNodeConverterStatementHandler implements RDFHandler {

        SailConnection m_connection;
       
        long m_count;

        URI m_uri = null;

        Map<String,URI> m_bnodeIDToURI = new HashMap<String,URI>();

        BNodeConverterStatementHandler(SailConnection conn) throws RepositoryException {
            m_connection = conn;
        }

        public void handleStatement(Statement st, Resource context) throws RDFHandlerException {
            Resource s = st.getSubject();
            URI p = st.getPredicate();
            Value o = st.getObject();
            if (s instanceof BNode) {
                String sid = ((BNode) s).getID();
                s = (URI) m_bnodeIDToURI.get(sid);
                if (s == null) {
                    s = addBNode(sid);
                }
            } else {
                m_uri = (URI) s;
            }

            if (o instanceof BNode) {
                String oid = ((BNode) o).getID();
                o = (URI) m_bnodeIDToURI.get(oid);
                if (o == null) {
                    o = addBNode(oid);
                }
            }

            try {
                m_connection.addStatement(s, p, o);
                m_count++;
            } catch (SailException e) {
                _logger.error("Exception caught while handling statement", e);
      }
        }

        URI addBNode(String bnode) {
            URI uri = new URIImpl(s_bnodePrefix
                    + (m_uri != null ? m_uri.toString() + ":" : "") + System.currentTimeMillis() + ":" + bnode);

            m_bnodeIDToURI.put(bnode, uri);

            return uri;
        }

        public void startRDF() throws RDFHandlerException {
        }

        public void endRDF() throws RDFHandlerException {
            try {
                m_connection.commit();
            } catch (SailException e) {
                try {
                    m_connection.rollback();
                } catch (Exception ee) {
                    // ignore
                }
                throw new RDFHandlerException(e);
            }
        }

        public void handleNamespace(String arg0, String arg1) throws RDFHandlerException {
            //
        }

        public void handleComment(String arg0) throws RDFHandlerException {
            //
        }

        public void handleStatement(Statement arg0) throws RDFHandlerException {
            handleStatement(arg0, null);
        }
    }
       
    static class LoggingParseErrorListener implements ParseErrorListener {
        String m_source;

        LoggingParseErrorListener(File file) {
            m_source = file.getAbsolutePath();
        }

        LoggingParseErrorListener(URL url) {
            m_source = url.toExternalForm();
        }

        LoggingParseErrorListener(String source) {
            m_source = source;
        }

        public void warning(String msg, int line, int column) {
            _logger.warn("Warning: " + msg + " at " + m_source + " [" + line + "," + column + "]");
        }

        public void error(String msg, int line, int column) {
            _logger.error("Error: " + msg + " at " + m_source + " [" + line + "," + column + "]");
        }

        public void fatalError(String msg, int line, int column) {
            _logger.error("Fatal error: " + msg + " at " + m_source + " [" + line + "," + column + "]");
        }
    }
}
TOP

Related Classes of edu.mit.simile.backstage.util.DataLoadingUtilities$ModelReadFromFileException

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.