Package uk.ac.osswatch.simal.importData

Source Code of uk.ac.osswatch.simal.importData.PTSWImport

/*
* Copyright 2007 University of Oxford
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package uk.ac.osswatch.simal.importData;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.StringWriter;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.apache.xml.serialize.OutputFormat;
import org.apache.xml.serialize.XMLSerializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

import uk.ac.osswatch.simal.SimalRepositoryFactory;
import uk.ac.osswatch.simal.rdf.ISimalRepository;
import uk.ac.osswatch.simal.rdf.SimalException;
import uk.ac.osswatch.simal.rdf.io.RDFUtils;

public class PTSWImport {
  private static final Logger logger = LoggerFactory
      .getLogger(PTSWImport.class);

  /**
   * Get latest pings as RDF/XML by retrieving the document
   * http://pingthesemanticweb.com/export/, extracting the list of pings and
   * then retrieving the relevant DOAP files.<br/>
   *
   * Note that you must have created an account on the Ping The Semantic Web
   * site for the IP number that is making the request.
   *
   * @throws SimalException
   *
   */
  public Document getLatestPingsAsRDF() throws SimalException {
    URL url;
    try {
      url = new URL("http://pingthesemanticweb.com/export/");
    } catch (MalformedURLException e) {
      throw new SimalException(
          "The PTSW URL is malformed, how can that happen since it is hard coded?",
          e);
    }
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    dbf.setNamespaceAware(true);
    Document doc = null;

    DocumentBuilder db;
    try {
      db = dbf.newDocumentBuilder();
      doc = db.parse(url.openStream());
    } catch (Exception e) {
      throw new SimalException("Unable to retrive the PTSW export file", e);
    }

    return getPingsAsRDF(doc);
  }

  /**
   * Get a list of pings contained within the supplied XML Export from Ping The
   * Semantic Web.
* @throws SimalException
   */
  public Set<URI> getListOfPings(Document export) throws SimalException {
    String strURL = null;
    HashSet<URI> uris = new HashSet<URI>();
    Element root = export.getDocumentElement();
   
    NodeList errors = root.getElementsByTagName("error");
    if (errors.getLength() > 0) {
      throw new SimalException("Unable to communicate with PTSW: " + errors.item(0).getTextContent());
    }
   
    NodeList pings = root.getElementsByTagName("rdfdocument");
    for (int i = 0; i < pings.getLength(); i++) {
      Element ping = (Element) pings.item(i);
      try {
        strURL = ping.getAttributeNode("url").getValue();
        if (strURL.startsWith("http://doapspace.org")) {
          strURL = strURL + ".rdf";
        }
        uris.add(new URI(strURL));
      } catch (URISyntaxException e) {
        logger.warn("Unable to process URL" + strURL, e);
      }
    }
    return uris;
  }

  /**
   * Get an RDF/XML document containing all the DOAP files referenced in a PTSW
   * export file.
   *
   * @throws ParserConfigurationException
   */
  public Document getPingsAsRDF(Document export) throws SimalException {
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    dbf.setNamespaceAware(true);
    DocumentBuilder db;
    try {
      db = dbf.newDocumentBuilder();
    } catch (ParserConfigurationException e) {
      throw new SimalException("Unable to create new RDF/XML document", e);
    }
    Document resultDoc = db.newDocument();
    Element root = resultDoc.createElementNS(RDFUtils.RDF_NS, "RDF");
    Iterator<URI> pings = getListOfPings(export).iterator();
    Document projectDoc;
    Element projectRoot;
    URI ping = null;
    while (pings.hasNext()) {
      try {
        ping = pings.next();
        projectDoc = db.parse(ping.toURL().openStream());
        projectRoot = projectDoc.getDocumentElement();
        if (!projectRoot.getLocalName().equals("Project")) {
          NamedNodeMap atts = projectRoot.getAttributes();
          Element docRoot = projectRoot;
          projectRoot = (Element) projectRoot.getElementsByTagNameNS(
              RDFUtils.DOAP_NS, "Project").item(0);
          for (int i = 0; i < atts.getLength(); i++) {
            Attr att = (Attr) atts.item(i);
            i = i - 1;
            if (att.getName().startsWith("xmlns")) {
              docRoot.removeAttributeNode(att);
              projectRoot.setAttributeNode(att);
            }
          }
        }
        Node importedProjectNode = resultDoc.importNode(projectRoot, true);
        Element seeAlso = resultDoc.createElementNS(RDFUtils.RDF_NS,
            "rdf:seeAlso");
        seeAlso.setAttributeNS(RDFUtils.RDF_NS, "rdf:resource", ping.toURL()
            .toExternalForm());
        importedProjectNode.appendChild(seeAlso);
        root.appendChild(importedProjectNode);
      } catch (IOException e) {
        logger.warn("Ignoring illegal XML document loaded from " + ping, e);
      } catch (SAXException e) {
        logger.warn("Ignoring illegal XML document loaded from " + ping, e);
      }
    }
    resultDoc.appendChild(root);
    return resultDoc;
  }

  /**
   * Import all the latest Doap documents.
* @throws SimalException
* @throws IOException
   */
  public void importLatestDOAP() throws SimalException, IOException
        ISimalRepository repo;
      int preProjectCount;
      repo = SimalRepositoryFactory.getInstance();
      preProjectCount = repo.getAllProjects().size();
     
      Document pings = getLatestPingsAsRDF();
     
          OutputFormat format = new OutputFormat(pings);
      StringWriter writer = new StringWriter();
      XMLSerializer serial = new XMLSerializer(writer, format);
      serial.serialize(pings);
      logger.info("Updated DOAP documents:\n");
      logger.info(writer.toString());
   
      File tmpFile = new File(System.getProperty("java.io.tmpdir")
          + File.separator + "PTSWExport.xml");
      FileWriter fw = null;
      try {
        fw = new FileWriter(tmpFile);
        fw.write(writer.toString());
      } catch (IOException e) {
        throw new SimalException(
            "Unable to write PTSW export file to temporary space");
      } finally {
        if (fw != null) {
          try {
            fw.close();
          } catch (IOException e) {
            logger.warn("Unable to close file", e);
          }
        }
      }
   
      repo.addProject(tmpFile.toURI().toURL(), tmpFile.toURI().toURL()
              .toExternalForm());
      int postProjectCount = repo.getAllProjects().size();
      logger.info("Imported " + (postProjectCount - preProjectCount)
            + " project records from PTSW");   
    }
}
TOP

Related Classes of uk.ac.osswatch.simal.importData.PTSWImport

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.