Package eu.planets_project.ifr.core.storage.impl.oai

Source Code of eu.planets_project.ifr.core.storage.impl.oai.OAIDigitalObjectManagerKBImpl

package eu.planets_project.ifr.core.storage.impl.oai;

import java.io.ByteArrayOutputStream;
import java.net.URI;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;

import eu.planets_project.ifr.core.storage.api.query.Query;
import eu.planets_project.ifr.core.storage.api.query.QueryValidationException;
import eu.planets_project.services.datatypes.Content;
import eu.planets_project.services.datatypes.DigitalObject;
import eu.planets_project.services.datatypes.Metadata;
import eu.planets_project.services.datatypes.DigitalObject.Builder;

import java.io.InputStream;

import java.net.*;
import java.io.*;
import java.util.*;


/**
* KB implementation of the OAI digital object manager.
*/
public class OAIDigitalObjectManagerKBImpl extends AbstractOAIDigitalObjectManagerImpl {
 
  /**
   * This is a buffer size and a max byte array size for reading data
   * from InputStream to byte array
   */
  public static final int BUFFER_SIZE = 1048576;
 
  private static final String RESOLVER_START = "<dc:identifier xsi:type=\"dcterms:URI\">";
  private static final String RESOLVER_END = "</dc:identifier>";
  private static final String POST_FORM_START = "method=\"post\" action=\"";
  private static final String POST_FORM_END = "\">";
  private static final String POST_FORM_NAME = "name=\"";
  private static final String POST_FORM_VALUE = "\" value=\"";
  private static final String LINK_START = "Open de <a href=\"";
  private static final String LINK_END = "\" target=\"_self\">publicatie";
  private static final String DOMAIN_NAME = "domain_name_place_holder"
  private static final String COMMENT_START = "<!--";
  private static final String COMMENT_END = "-->";
  private static final String AND_CHAR = "&";
  private static final String GLEICH_CHAR = "=";
  private static final String BRACES = "%22";
  private static final String METADATA_END = "</dc";
  private static final String METADATA_TYPE = "dcx";
 
  private URI baseRegistryURI = null;


  /**
   * This is an enumeration of the OAI article metadata
   */
  enum OaiMetadata {
       title, bibliographicCitation, creator, subject, abstrac, publisher,
       extent, uri, isPartOf, accessRights
      }
   
  /**
   * This is an array of the OAI article numbers
   */ 
  String [] Articles = {
         "1237818724132", "1237818828220", "1237818827273", "1237818707653", "1237818781294",
         "1237818757698", "1237818353465", "1237818740969", "1237818819576", "1237818655074",
         "1262698022536"
        };
     
  /**
   * The cache map binds URI with the digital object.
   */
  private static Map<URI, DigitalObject> leafMap = new HashMap<URI, DigitalObject>();
 
    /**
     * The manager control thread.
     */
//    private static ManagerControl mc;

  /**
   * This is a cache for list method.
   */
  private static ArrayList<URI> uriList = new ArrayList<URI>();


  /**
   * @param baseURL The base URL
   */
  public OAIDigitalObjectManagerKBImpl(String baseURL) {
    super(baseURL, "");
//        mc = new ManagerControl(this);
//        mc.start();
//        log.info("Manager control thread started.");
  }
 

  /**
   * This method evaluates original HTTP URI from registry URI
   * @param uri The registry URI
   * @return The original HTTP URI
   */
  public URI getOriginalUri(URI keyUri) {
    URI res = keyUri;
    try {
      if (keyUri != null) {
          log.info("OAIDigitalObjectManagerKBImpl getOriginalUri() find out the original key for uri: " + keyUri);
          for(URI uri : leafMap.keySet()) {
            if (uri.toString().contains(keyUri.toString())) {
              res = uri;
              log.info("OAIDigitalObjectManagerKBImpl getOriginalUri() found: " + res);
              break;
            }
          }
      }

    } catch (Exception e) {
      log.info("OAIDigitalObjectManagerKBImpl getOriginalUri() error: " + e.getMessage());       
    }

    return res;   
  }
 
 
  /**
   * {@inheritDoc}
   * @see eu.planets_project.ifr.core.storage.api.DigitalObjectManager#retrieve(java.net.URI)
   */
  public DigitalObject retrieve(URI pdURI) throws DigitalObjectNotFoundException {
      long starttime = System.currentTimeMillis();
      log.info("OAIDigitalObjectManagerKBImpl retrieve() starttime: " + starttime);
    try {
      log.log(Level.INFO, "OAIDigitalObjectManagerKBImpl retrieve() uri: " + pdURI);
     
      URI originalURI = getOriginalUri(pdURI);
     
      // return digital object if it exists in the map
      if (originalURI != null && leafMap.containsKey(originalURI)) {
          log.info("OAIDigitalObjectManagerKBImpl retrieve() already exist in map uri: " + originalURI);
          long endtime = System.currentTimeMillis();
          log.info("OAIDigitalObjectManagerKBImpl retrieve() timediff: " + (endtime - starttime));
        return leafMap.get(originalURI);
      }
     
        long endtime = System.currentTimeMillis();
        log.info("OAIDigitalObjectManagerKBImpl retrieve() error1: NoHTTP URL available." + " timediff: " + (endtime - starttime));
      throw new DigitalObjectNotFoundException("No HTTP URL available for this record");
    } catch (Exception e) {
      throw new DigitalObjectNotFoundException(e.getMessage());
    }
  }


    /* (non-Javadoc)
     * @see eu.planets_project.ifr.core.storage.api.DigitalObjectManager#list(java.net.URI, eu.planets_project.ifr.core.storage.api.query.Query)
     */
    public List<URI> list(URI pdURI, Query q) throws QueryValidationException {
      return list(pdURI, null);
    }


    /**
     * This method retrieves article metadata from the metadata repository response
     * @param emd
     * @param resolver
     * @return data for particular metadata field
     */
    private String retrieveOaiMetadata(OaiMetadata emd, String resolver) {
      String res = "";
      if (resolver != null && resolver.indexOf(emd.name()) > 0) {
        res = resolver.substring(resolver.indexOf(emd.name()) + emd.name().length() + 1,
                             resolver.indexOf(METADATA_END, resolver.indexOf(emd.name())));
        if (emd.equals(OaiMetadata.abstrac) && res != null && res.length() > 0) {
          res = res.substring(1);
        }
      }
    log.log(Level.INFO, "retrieveOaiMetadata() res: " + res + ", enum: " + emd);
      return res;
    }
   

    /* (non-Javadoc)
     * @see eu.planets_project.ifr.core.storage.impl.oai.AbstractOAIDigitalObjectManagerImpl#list(java.net.URI)
     */
    public List<URI> list(URI pdURI) {
      if (pdURI != null) {
        if (uriList != null && uriList.size() > 0) {
          return uriList;
        } else {
            long starttime = System.currentTimeMillis();
            log.info("OAIDigitalObjectManagerKBImpl list() starttime: " + starttime);
          // OAI hierarchy is flat (no sub-directories)
          ArrayList<URI> resultList = new ArrayList<URI>();
          for (int i = 0 ; i < Articles.length ; i++) {
            String resolver = transferData(OAIDigitalObjectManagerKBBase.DEFAULT_BASE_URL + BRACES +
                Articles[i] + BRACES);
  //            log.log(Level.INFO, "test() init resolver[" + i +  "]: " + resolver);
            String resolverLink = resolver.substring(resolver.indexOf(RESOLVER_START) + RESOLVER_START.length(),
                            resolver.indexOf(RESOLVER_END));
              log.log(Level.INFO, "test() resolverLink[" + i +  "]: " + resolverLink);
              try {
                if (resolverLink != null) {
              // Get an intermediate HTML page and the publication link
              String publicationLink = retrieveIntermediateHtmlPage(resolverLink);
                  resultList.add(URI.create(publicationLink));
   
                  // Retrieve metadata
                  String title = "";
                  List<Metadata> metadataList = new ArrayList<Metadata>(0);
                  for (OaiMetadata emd : OaiMetadata.values()) {
                    String md = retrieveOaiMetadata(emd, resolver);
                    if (md != null && md.length() > 0) {
                       Metadata metadata = new Metadata(URI.create(METADATA_TYPE), emd.name(), md);
                       metadataList.add(metadata);
                  // Title
                  if (metadata.getName().equalsIgnoreCase(OaiMetadata.title.name())) {
                    title = metadata.getContent();
                  }
                    }
                  }
                             
              if (publicationLink != null && publicationLink.toString().length() > 0) {
                Builder builder = new DigitalObject.Builder(Content.byReference(URI.create(publicationLink).toURL()));
                builder.title(title);
                String filename = "";
                      if(publicationLink != null) {
                          filename = URI.create(publicationLink).getPath();
                          log.info("OAIDigitalObjectManagerKBImpl list() filename: " + filename);
                          if(filename != null) {
                              String[] parts = filename.split("/");
                              if( parts != null && parts.length > 0 )
                                filename = parts[parts.length-1];
                          }
                      }
                      log.info("OAIDigitalObjectManagerKBImpl list() filename: " + filename +
                          ", pdURI.toString(): " + pdURI.toString() + ", publicationLink: " + publicationLink);

                       URI permanentUri = URI.create(getBaseRegistryURI() +"/"+ filename).normalize();           
                builder.permanentUri(permanentUri);
                builder.metadata(metadataList.toArray(new Metadata[]{}));
               
                  long endtime = System.currentTimeMillis();
                  log.info("OAIDigitalObjectManagerKBImpl list() timediff: " + (endtime - starttime));
                DigitalObject o = builder.build();
                if (publicationLink != null && !leafMap.containsKey(publicationLink)) {
                    log.info("OAIDigitalObjectManagerKBImpl list() add to map uri: " + publicationLink);
                  leafMap.put(URI.create(publicationLink), o);
                }
              }
                }
              } catch (Exception e) {
              log.info("OAIDigitalObjectManagerKBImpl list() error: " + e.getMessage());               
              }
          }
          uriList = resultList;
            return resultList;
        }
      } else {
        return new ArrayList<URI>();
      }
    }
   
   
    /**
     * This method removes comments from the HTML page.
     * @param str The HTML page source code
     * @return The HTML source code without comments
     */
    private static String removeComments(String str) {
      String res = str;
      if (str.indexOf(COMMENT_START) > 0) {
        String tmp = str.substring(0, str.indexOf(COMMENT_START)) + str.substring(str.indexOf(COMMENT_END) + COMMENT_END.length());
//      log.log(Level.INFO, "removeComments tmp: " + tmp);
        res = removeComments(tmp);
      }
    log.log(Level.INFO, "removeComments return res: " + res);
      return res;
    }
   
   
    /**
     * This method transfers bytes from InputStream to string
     * @param path The path to the server
     * @return data as a string
     */
    private static String transferData(String path) {
      String res = "";
     
    try {
      URL url = URI.create(path).toURL();
      InputStream is = url.openStream();
 
       ByteArrayOutputStream out = new ByteArrayOutputStream();
       byte[] buf = new byte[BUFFER_SIZE];
       int len;
       log.log(Level.INFO, "##### inputstream available: " + is.available());
       while ((len = is.read(buf)) > 0)
       {
          out.write(buf, 0, len);
       }
       log.log(Level.INFO, "##### buf length: " + len + ", out.len: " + out.size());
       byte[] byteContent = out.toByteArray();
           log.log(Level.INFO, "evaluateContent() byteContent.length: " + byteContent.length);
       out.close();
           is.close();
           res = new String(byteContent);
    } catch (Exception e) {
      log.info("OAIDigitalObjectManagerKBImpl error: " + e.getMessage());
    }
   
    return res;
    }


    /**
     * This method creates a post form request
     * @param path The path to the server
     * @param parameterList The form parameter
     * @return The server response
     */
    private static String sendPostRequest(String path, ArrayList<String> parameterList) {
      String res = "";
     
    try {
      URL url = URI.create(path).toURL();
      // Construct data
      String data = "";
      Iterator<String> i = parameterList.iterator();
      while (i.hasNext()) {
        if (data.length() == 0) {
          data = i.next();
        } else {
          data = data + AND_CHAR + i.next();         
        }
        log.log(Level.INFO, "sendPostRequest() data: " + data);
      }

      // Send data
      URLConnection conn = url.openConnection();
      conn.setDoOutput(true);
      OutputStreamWriter wr = new OutputStreamWriter(conn.getOutputStream());
      wr.write(data);
      wr.flush();
     
      // Get the response
      BufferedReader rd = new BufferedReader(new InputStreamReader(conn.getInputStream()));
      String line;
      while ((line = rd.readLine()) != null) {
        res = res + line;
        log.log(Level.INFO, "sendPostRequest() line: " + line);     
        }
      wr.close();
      rd.close();
    } catch (Exception e) {
      log.log(Level.INFO, "sendPostRequest() error: " + e.getMessage());     
    }
   
    return res;
    }
   
   
    /**
     * This method retrieves the intermediate HTML page from the resolver link.
     * @param resolver link
     * @return intermediate HTML page source code
     */
    private static String retrieveIntermediateHtmlPage(String resolver) {
      String res = "";
     
      // Get post form with URL and parameter to request an intermediate HTML page
    String postForm = transferData(resolver);
    log.log(Level.INFO, "test() postForm: " + postForm);   

    postForm = removeComments(postForm);
    log.log(Level.INFO, "test() after removeComments postForm: " + postForm);
    String postFormLink = postForm.substring(postForm.indexOf(POST_FORM_START) + POST_FORM_START.length(),
        postForm.indexOf(POST_FORM_END, postForm.indexOf(POST_FORM_START) + POST_FORM_START.length()));

    ArrayList<String> parameterList = new ArrayList<String>(0);
    ArrayList<String> tmpParameterList = new ArrayList<String>(Arrays.asList(postForm.split(POST_FORM_NAME)));

    Iterator<String> i = tmpParameterList.iterator();
    while (i.hasNext()) {
      String line = i.next();
//      log.log(Level.INFO, "test() line: " + line);
      if (line.contains(POST_FORM_VALUE)) {
        try {
          String name = URLEncoder.encode(line.substring(0, line.indexOf(POST_FORM_VALUE)), "UTF-8");
          String value = URLEncoder.encode(line.substring(line.indexOf(POST_FORM_VALUE) + POST_FORM_VALUE.length()
              , line.indexOf(POST_FORM_END)), "UTF-8");
 
          parameterList.add(name + GLEICH_CHAR + value);
          log.log(Level.INFO, "test() param: " + name + GLEICH_CHAR + value);
        } catch (Exception e) {
          log.log(Level.INFO, "retrieveIntermediateHtmlPage(): " + e.getMessage());         
        }
      }
    }
   
    // Get an intermediate HTML page   
    String intermediateHtml = sendPostRequest(postFormLink, parameterList);
    log.log(Level.INFO, "test() intermediateHtml: " + intermediateHtml);
   
    // Retrieve publication link
    if (intermediateHtml != null && intermediateHtml.length() > 0) {
      res = DOMAIN_NAME + intermediateHtml.substring(intermediateHtml.indexOf(LINK_START) + LINK_START.length(),
          intermediateHtml.indexOf(LINK_END));
      }

    log.log(Level.INFO, "test() retrieveIntermediateHtmlPage() res: " + res);
    return res;
    }
   
   
    /*
  public static void main(String[] args) {
      long starttime = System.currentTimeMillis();
      log.info("OAIDigitalObjectManagerKBImpl retrieve() starttime: " + starttime);
   
    OAIDigitalObjectManagerKBImpl oaiImpl = new OAIDigitalObjectManagerKBImpl(OAIDigitalObjectManagerKBBase.DEFAULT_BASE_URL);   
   
    // ListIdentifiers
    System.out.println("starting query.");
    List<URI> identifiers = oaiImpl.list(null);
    System.out.println(identifiers.size() + " found.");
   
    // GetRecord for each identifier
    for (URI id : identifiers) {
      try {
        DigitalObject dob = oaiImpl.retrieve(id);
        System.out.println("retrieved file: " + dob.getTitle());
        System.out.println("retrieved metadata size: " + dob.getMetadata().size());
      } catch (DigitalObjectNotFoundException e) {
        System.out.println("couldn't retrieve file: " + e.getMessage());
      }
    }
    System.out.println("done.");
  }
  */
   
   
    public URI storeAsNew(URI pdURI, DigitalObject digitalObject) throws eu.planets_project.ifr.core.storage.api.DigitalObjectManager.DigitalObjectNotStoredException {
        throw new DigitalObjectNotStoredException("Storing not supported by this implementation.");
    }
   
    public URI storeAsNew(DigitalObject digitalObject) throws eu.planets_project.ifr.core.storage.api.DigitalObjectManager.DigitalObjectNotStoredException {
        throw new DigitalObjectNotStoredException("Storing not supported by this implementation.");
    }

    public URI updateExisting(URI pdURI, DigitalObject digitalObject) throws eu.planets_project.ifr.core.storage.api.DigitalObjectManager.DigitalObjectNotStoredException, eu.planets_project.ifr.core.storage.api.DigitalObjectManager.DigitalObjectNotFoundException {
        throw new DigitalObjectNotStoredException("Storing not supported by this implementation.");
    }

   
    /**
     * This class manages the cache of retrieved digital objects.
     * @author GrafR
     *
     */
    @SuppressWarnings("unused")
  private class ManagerControl extends Thread {
       
        /**
         * Repository implementation
         */
        OAIDigitalObjectManagerKBImpl impl;
       
        /**
         * The time between consistency check
         */
        long sleeptime = 1200000;

        long starttime = 0;
        int counter = 0;
       
        /**
         * @param impl The repository implementation
         */
        public ManagerControl(OAIDigitalObjectManagerKBImpl _impl) {
            this.impl = _impl;
          starttime = System.currentTimeMillis();
          log.info("ManagerControl() starttime: " + starttime);
        }

        public void run() {
            while (true) {
                try {
                    log.info("ManagerControl run().");
                    Thread.sleep(sleeptime);
                    // check cache entries. Update digital object if necessary.
                    Iterator<URI> leafIterator = leafMap.keySet().iterator();
                    while(leafIterator.hasNext()) {
                        URI currentUri = leafIterator.next();
                        log.info("ManagerControl check uri: " + currentUri);   
                        try {
                          DigitalObject presentObj = impl.retrieve(currentUri);
                          if (!presentObj.equals(leafMap.get(currentUri))) {
                            leafMap.put(currentUri, presentObj);
                          }
                        } catch (DigitalObjectNotFoundException e) {
                            log.info("ManagerControl digital object not found for uri: " + currentUri);   
                          long removetime = System.currentTimeMillis();
                          log.info("ManagerControl() difftime: " + (removetime - starttime) + ", counter: " + counter);
                            log.info("ManagerControl remove from cache for uri: " + currentUri);   
                            // remove it if it is not more present in the repository.
                            if (leafMap.containsKey(currentUri)) {
                              leafMap.remove(currentUri);
                            }
                        }
                    }
                } catch (InterruptedException e) {
                    log.info("ManagerControl error: " + e.getMessage());
                    e.printStackTrace();
                }
                counter++;
            }
        }
    }


  public URI getBaseRegistryURI() {
    return baseRegistryURI;
  }


  public void setBaseRegistryURI(URI _baseRegistryURI) {
    baseRegistryURI = _baseRegistryURI;
  }
 
}
TOP

Related Classes of eu.planets_project.ifr.core.storage.impl.oai.OAIDigitalObjectManagerKBImpl

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.