Package org.fao.geonet.kernel.harvest.harvester.z3950

Source Code of org.fao.geonet.kernel.harvest.harvester.z3950.Harvester

//=============================================================================
//===  Copyright (C) GeoNetwork
//===
//===  This program is free software; you can redistribute it and/or modify
//===  it under the terms of the GNU General Public License as published by
//===  the Free Software Foundation; either version 2 of the License, or (at
//===  your option) any later version.
//===
//===  This program is distributed in the hope that it will be useful, but
//===  WITHOUT ANY WARRANTY; without even the implied warranty of
//===  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//===  General Public License for more details.
//===
//===  You should have received a copy of the GNU General Public License
//===  along with this program; if not, write to the Free Software
//===  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
//===
//===  Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2,
//===  Rome - Italy. email: geonetwork@osgeo.org
//==============================================================================

package org.fao.geonet.kernel.harvest.harvester.z3950;

import com.google.common.base.Optional;
import jeeves.server.ServiceConfig;
import jeeves.server.UserSession;
import jeeves.server.context.ServiceContext;

import org.fao.geonet.GeonetContext;
import org.fao.geonet.Logger;
import org.fao.geonet.constants.Edit;
import org.fao.geonet.constants.Geonet;
import org.fao.geonet.domain.ISODate;
import org.fao.geonet.domain.Metadata;
import org.fao.geonet.domain.MetadataCategory;
import org.fao.geonet.domain.MetadataType;
import org.fao.geonet.kernel.DataManager;
import org.fao.geonet.kernel.harvest.BaseAligner;
import org.fao.geonet.kernel.harvest.harvester.CategoryMapper;
import org.fao.geonet.kernel.harvest.harvester.GroupMapper;
import org.fao.geonet.kernel.harvest.harvester.HarvestError;
import org.fao.geonet.kernel.harvest.harvester.HarvestResult;
import org.fao.geonet.kernel.harvest.harvester.IHarvester;
import org.fao.geonet.kernel.harvest.harvester.Privileges;
import org.fao.geonet.kernel.harvest.harvester.UUIDMapper;
import org.fao.geonet.kernel.search.MetaSearcher;
import org.fao.geonet.kernel.search.SearchManager;
import org.fao.geonet.kernel.setting.SettingManager;
import org.fao.geonet.repository.MetadataCategoryRepository;
import org.fao.geonet.repository.MetadataRepository;
import org.fao.geonet.repository.Updater;
import org.fao.geonet.services.main.Info;
import org.fao.geonet.utils.Xml;
import org.jdom.DocType;
import org.jdom.Document;
import org.jdom.Element;

import javax.annotation.Nonnull;
import java.util.*;

//=============================================================================

class Harvester extends BaseAligner implements IHarvester<Z3950ServerResults> {
  private UUIDMapper localUuids;
  private final DataManager dataMan;
  private final SearchManager searchMan;
  private final SettingManager settingMan;

  // --------------------------------------------------------------------------
  // ---
  // --- Constructor
  // ---
  // --------------------------------------------------------------------------

  public Harvester(Logger log, ServiceContext context, Z3950Params params) {
    GeonetContext gc = (GeonetContext) context.getHandlerContext(Geonet.CONTEXT_NAME);
    this.context = context;
    this.log = log;
    this.searchMan = gc.getBean(SearchManager.class);
    this.dataMan = gc.getBean(DataManager.class);
    this.settingMan = gc.getBean(SettingManager.class);
    this.context = context;
    this.params = params;
  }

  // ---------------------------------------------------------------------------
  // ---
  // --- API methods
  // ---
  // ---------------------------------------------------------------------------

  public Z3950ServerResults harvest(final Logger log) throws Exception {
    Set<String> newUuids = new HashSet<String>();

    int groupSize = 10;

    this.log = log;
    log.info("Retrieving remote metadata information:" + params.uuid);

    Z3950ServerResults serverResults = new Z3950ServerResults();

    // --- Clean all before harvest : Remove/Add mechanism
    localUuids = new UUIDMapper(context.getBean(MetadataRepository.class), params.uuid);

    // --- remove old metadata
    for (String uuid : localUuids.getUUIDs()) {
      String id = localUuids.getID(uuid);
            if(this.log.isDebugEnabled()) log.debug("  - Removing old metadata before update with id: " + id);
      dataMan.deleteMetadataGroup(context, id);
      serverResults.locallyRemoved++;
    }


        if (serverResults.locallyRemoved > 0) {
            dataMan.flush();
        }

    // --- Search remote node
    MetaSearcher s = searchMan.newSearcher(SearchManager.Z3950, Geonet.File.SEARCH_Z3950_CLIENT);

    ServiceConfig config = new ServiceConfig();

    Element request = new Element("request");

    // --- Z39.50 servers from harvest params
    for (String id : params.getRepositories()) {
      request.addContent(new Element(Geonet.SearchResult.SERVERS).setText(id));
    }

    // --- Z39.50 query from harvest params
    request.addContent(new Element(Geonet.SearchResult.ZQUERY)
        .setText(params.query));

    // --- don't get html presentations (get them later)
    request.addContent(new Element(Geonet.SearchResult.SERVERHTML)
        .setText("off"))

    // --- set timeout to be 100 seconds
    request.addContent(new Element(Geonet.SearchResult.TIMEOUT).setText("100"))
    // --- set hitsPerPage
    request.addContent(new Element(Geonet.SearchResult.HITS_PER_PAGE).setText(groupSize+""))

    // --- do the search
    s.search(context, request, config);

    if (s.getSize() == 0) {
      log.error("Search failed or returned 0 results, trying again");
      s.search(context, request, config);
      if (s.getSize() == 0) {
        throw new Exception("Bad luck, Search failed or returned 0 results");
      }
    }

        if(log.isDebugEnabled()) log.debug("Search returned "+s.getSize()+" hits");

    // -- process the hits in groups of groupSize
    int numberOfHits = Math.min(Integer.parseInt(params.maximumHits),s.getSize());
    // -- add from and to placeholders to request

    request.addContent(new Element("from"));
    request.addContent(new Element("to"));

    Element repositories = new Info().getZRepositories(context, settingMan);
        if(log.isDebugEnabled()) {
            log.debug("repos "+Xml.getString(repositories));
        }

    // -- build a map of collection code versus repository name for
    // -- assigning the categories
    Map <String,String> codes = new HashMap<String,String>();
    Map <String,String> catCodes = new HashMap<String,String>();

        final MetadataCategoryRepository categoryRepository = this.context.getBean(MetadataCategoryRepository.class);
        // -- add new category for each repository
    boolean addcateg = false;
    for (String repo : params.getRepositories()) {
      Element repoElem = Xml.selectElement(repositories, "record[id='"+repo+"']");
      if (repoElem != null) {
        Element repoId  = repoElem.getChild("id");
        String repoName = repoElem.getChildText("name");
        codes.put(repoId.getAttributeValue("serverCode")+":"+repoId.getAttributeValue("code"), repoName);
        // create a result holder for this repository
        serverResults.getServerResult(repoName);

        // sanitize the name of the category
        String categName = repoName.replaceAll("[^\\w]","");
        categName = categName.toLowerCase();
        catCodes.put(repoId.getAttributeValue("serverCode")+":"+repoId.getAttributeValue("code"), categName);

        if (categoryRepository.findOneByNameIgnoreCase(categName) == null) {
                    MetadataCategory category = new MetadataCategory();
                    category.setName(categName);
                    categoryRepository.save(category);
          addcateg = true;
        }
      }
    }

    if (addcateg) {
            categoryRepository.flush();
        }

    // --- return only maximum hits as directed by the harvest params
    int nrGroups = (numberOfHits / groupSize) + 1;
    for (int i = 1; i <= nrGroups; i++) {
      int lower = ((i-1)*groupSize)+1
      int upper = Math.min((i*groupSize),numberOfHits);
      request.getChild("from").setText(""+lower)
      request.getChild("to").setText(""+upper)

      // --- Loading results
      List<Document> list = s.presentDocuments(context, request, config);

      // --- Loading categories and groups
      localCateg = new CategoryMapper(context);
      localGroups = new GroupMapper(context);

            if(log.isDebugEnabled())
                log.debug("There are "+(list.size()-1)+" children in the results ("+lower+" to "+upper+")");

      boolean transformIt = false;
      String thisXslt = context.getAppPath() + Geonet.Path.IMPORT_STYLESHEETS + "/";
      if (!params.importXslt.equals("none")) {
        thisXslt = thisXslt + params.importXslt;
        transformIt = true;
      }

      // --- For each record....
      for (Document doc : list) {
        Element md = doc.getRootElement();
        String eName = md.getQualifiedName();
        if (eName.equals("summary")) continue;

        // -- Remove existing geonet:info children as for example
        // -- GeoNetwork Z39.50 server return when full mode
        // -- an extra element with server info not needed
        // -- once harvested
        String colCode = "";
        Element info = md.getChild(Edit.RootChild.INFO, Edit.NAMESPACE);
        if (info != null) {
          String serverCode = info.getChildText("server");
          int colPos = serverCode.indexOf(':');
          if (colPos != -1) {
            colCode = serverCode.substring(0,colPos)+":"+info.getChildText("collection");
          }
        }
        md.removeChildren(Edit.RootChild.INFO, Edit.NAMESPACE);
        String repoName = codes.get(colCode);
                if(log.isDebugEnabled()) log.debug("Processing record from server "+repoName);
        HarvestResult result = serverResults.getServerResult(repoName);
        result.totalMetadata++;

        if (eName.equals("error")) {
          log.error("JZKit could not retrieve record - returned "+Xml.getString(md));
          result.unretrievable++;
          continue;
        }

        // transform using importxslt if not none
        if (transformIt) {
          try {
                        if(log.isDebugEnabled()) log.debug("Before transform: "+Xml.getString(md));
            md = Xml.transform(md, thisXslt);
                        if(log.isDebugEnabled()) log.debug("After transform: "+Xml.getString(md));
          } catch (Exception e) {
              HarvestError error = new HarvestError(e, log);
              error.setDescription("Cannot transform XML, ignoring. Error was: "+e.getMessage());
            this.errors.add(error);
            error.printLog(log);
            result.badFormat++;
            continue; // skip this one
          }
        }

        // detect schema, extract uuid and add
        String schema = dataMan.autodetectSchema(md, null);
        if (schema == null) {
          log.warning("Skipping metadata with unknown schema.");
          result.unknownSchema++;
          continue;
        }

        String uuid = null;
        try {
          uuid = dataMan.extractUUID(schema, md);
        } catch (Exception e) {
                    HarvestError error = new HarvestError(e, log);
                    error.setDescription("Unable to extract UUID. " + e.getMessage());
                    this.errors.add(error);
                    error.printLog(log);
        }

        if (uuid == null || uuid.equals("")) {
          log.warning("Skipping metadata due to failure extracting uuid (uuid null or empty).");
          result.unretrievable++;
          continue;
        }

        log.info("  - Adding metadata with " + uuid);

        //--- generate a new metadata id
   
        String docType = "";
        if (!transformIt && (doc.getDocType() != null)) {
          docType = Xml.getString(doc.getDocType());
        }

        //--- check for duplicate uuid - violates constraints on metadata table
        //--- if we attempt insert
        boolean alreadyAdded = !newUuids.add(uuid);
        boolean alreadyInDb  = (dataMan.getMetadataId(uuid) != null);
        if (alreadyAdded || alreadyInDb) {
          log.error("Uuid "+uuid+" already exists in this set/database - cannot insert");
          result.couldNotInsert++;
          continue;
        }

                final String id;
                //
                // insert metadata
                //
        try {
                    String groupOwner = "1", isTemplate = "n", title = null;
                    int owner = 1;
                    String category = null, createDate = new ISODate().toString(), changeDate = createDate;
                    boolean ufo = false, indexImmediate = false;
                    id = dataMan.insertMetadata(context, schema, md, uuid, owner, groupOwner, params.uuid,
                            isTemplate, docType, category, createDate, changeDate, ufo, indexImmediate);

                }
                catch (Exception e) {
                    HarvestError error = new HarvestError(e, log);
                    error.setDescription("Unable to insert metadata. "+e.getMessage());
                    this.errors.add(error);
                    error.printLog(log);
          result.couldNotInsert++;
          continue;
        }

                addPrivileges(id, params.getPrivileges(), localGroups, dataMan, context, log);
                context.getBean(MetadataRepository.class).update(Integer.parseInt(id), new Updater<Metadata>() {
                    @Override
                    public void apply(@Nonnull Metadata entity) {
                        addCategories(entity, params.getCategories(), localCateg, context, log, null);
                    }
                });


                final Integer iId = Integer.valueOf(id);
                dataMan.setTemplateExt(iId, MetadataType.METADATA);
        dataMan.setHarvestedExt(iId, params.uuid, Optional.of(params.name));

        // validate it here if requested
        if (params.validate) {
          Document docVal;
          if (!transformIt && (doc.getDocType() != null)) {
            docVal = new Document(md, (DocType)doc.getDocType().detach());
          } else {
            docVal = new Document(md);
          }

          if (!dataMan.doValidate(schema, id, docVal, context.getLanguage())) {
            result.doesNotValidate++;
          }
        }

                dataMan.flush();

                dataMan.indexMetadata(id, false);

                result.addedMetadata++;
            }
        }

    return serverResults;
  }

  // ---------------------------------------------------------------------------
  // ---
  // --- Variables
  // ---
  // ---------------------------------------------------------------------------

  private Logger log;
  private final Z3950Params params;
  private ServiceContext context;
  private CategoryMapper localCateg;
  private GroupMapper localGroups;
    /**
     * Contains a list of accumulated errors during the executing of this harvest.
     */
    private List<HarvestError> errors = new LinkedList<HarvestError>();
  @Override
  public List<HarvestError> getErrors() {
    return errors;
  }
}
TOP

Related Classes of org.fao.geonet.kernel.harvest.harvester.z3950.Harvester

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.