Package org.fao.geonet.kernel.harvest.harvester.webdav

Source Code of org.fao.geonet.kernel.harvest.harvester.webdav.Harvester

//=============================================================================
//===  Copyright (C) 2001-2007 Food and Agriculture Organization of the
//===  United Nations (FAO-UN), United Nations World Food Programme (WFP)
//===  and United Nations Environment Programme (UNEP)
//===
//===  This program is free software; you can redistribute it and/or modify
//===  it under the terms of the GNU General Public License as published by
//===  the Free Software Foundation; either version 2 of the License, or (at
//===  your option) any later version.
//===
//===  This program is distributed in the hope that it will be useful, but
//===  WITHOUT ANY WARRANTY; without even the implied warranty of
//===  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//===  General Public License for more details.
//===
//===  You should have received a copy of the GNU General Public License
//===  along with this program; if not, write to the Free Software
//===  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
//===
//===  Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2,
//===  Rome - Italy. email: geonetwork@osgeo.org
//==============================================================================
package org.fao.geonet.kernel.harvest.harvester.webdav;

import com.google.common.base.Optional;
import jeeves.server.context.ServiceContext;
import org.fao.geonet.GeonetContext;
import org.fao.geonet.Logger;
import org.fao.geonet.constants.Geonet;
import org.fao.geonet.domain.*;
import org.fao.geonet.exceptions.NoSchemaMatchesException;
import org.fao.geonet.kernel.DataManager;
import org.fao.geonet.kernel.SchemaManager;
import org.fao.geonet.kernel.harvest.BaseAligner;
import org.fao.geonet.kernel.harvest.harvester.*;
import org.fao.geonet.repository.HarvesterDataRepository;
import org.fao.geonet.repository.MetadataRepository;
import org.fao.geonet.repository.OperationAllowedRepository;
import org.fao.geonet.repository.Updater;
import org.fao.geonet.utils.Log;
import org.fao.geonet.utils.Xml;
import org.jdom.Element;
import org.jdom.JDOMException;

import javax.annotation.Nonnull;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import java.util.UUID;

//=============================================================================

class Harvester extends BaseAligner implements IHarvester<HarvestResult> {
  //--------------------------------------------------------------------------
  //---
  //--- Constructor
  //---
  //--------------------------------------------------------------------------

  public Harvester(Logger log, ServiceContext context, WebDavParams params) {
    this.log    = log;
    this.context= context;
    this.params = params;

    result = new HarvestResult();

    GeonetContext gc = (GeonetContext) context.getHandlerContext(Geonet.CONTEXT_NAME);
    dataMan = gc.getBean(DataManager.class);
    schemaMan = gc.getBean(SchemaManager.class);
  }

  //---------------------------------------------------------------------------
  //---
  //--- API methods
  //---
  //---------------------------------------------------------------------------
  @Override
  public HarvestResult harvest(Logger log) throws Exception {
    this.log = log;
        if(log.isDebugEnabled()) log.debug("Retrieving remote metadata information for : "+ params.name);
        RemoteRetriever rr = null;
        if (params.subtype.equals("webdav")) {
            rr = new WebDavRetriever();
        } else if (params.subtype.equals("waf")) {
            rr = new WAFRetriever();
        } else {
            throw new IllegalArgumentException(params.subtype + " is not one of webdav or waf");
        }
        try {
            Log.info(Log.SERVICE, "webdav harvest subtype : "+params.subtype);
            rr.init(log, context, params);
            List<RemoteFile> files = rr.retrieve();
            if(log.isDebugEnabled()) log.debug("Remote files found : "+ files.size());
            align(files);
        } finally { rr.destroy();
        }
    return result;
  }

  //---------------------------------------------------------------------------
  //---
  //--- Private methods
  //---
  //---------------------------------------------------------------------------

  private void align(final List<RemoteFile> files) throws Exception {
    log.info("Start of alignment for : "+ params.name);
    //-----------------------------------------------------------------------
    //--- retrieve all local categories and groups
    //--- retrieve harvested uuids for given harvesting node
    localCateg = new CategoryMapper(context);
    localGroups= new GroupMapper(context);
    localUris  = new UriMapper(context, params.uuid);

    //-----------------------------------------------------------------------
    //--- remove old metadata
    for (final String uri : localUris.getUris()) {
            if (!exists(files, uri)) {
                // only one metadata record created per uri by this harvester
                String id = localUris.getRecords(uri).get(0).id;
                if (log.isDebugEnabled()){
                    log.debug("  - Removing old metadata with local id:"+ id);
                }
                try {
                    dataMan.deleteMetadataGroup(context, id);
                } catch (Exception e) {
                    log.error("Error occurred while deleting metadata id");
                }
                dataMan.flush();
                result.locallyRemoved++;

            }
    }
    //-----------------------------------------------------------------------
    //--- insert/update new metadata

    for(RemoteFile rf : files) {
      result.totalMetadata++;
      List<RecordInfo> records = localUris.getRecords(rf.getPath());
      if (records == null) {
        addMetadata(rf);
      } else {
        // only one metadata record created per uri by this harvester
        updateMetadata(rf, records.get(0));
      }
    }
    log.info("End of alignment for : "+ params.name);
  }

  //--------------------------------------------------------------------------
  /** Returns true if the uri is present in the remote folder */
  private boolean exists(List<RemoteFile> files, String uri) {
    for(RemoteFile rf : files) {
      if (uri.equals(rf.getPath())) {
        return true;
      }
    }
    return false;
  }

  //--------------------------------------------------------------------------
  //---
  //--- Private methods : addMetadata
  //---
  //--------------------------------------------------------------------------
  /**
   *
   To determine the UUID we are going to use the following mechanism: 1.-
   * Look for the file identifier on the metadata xml 2.- If there is no file
   * identifier, then use the name of the file 3.- If there is a collision of
   * uuid with existent metadata, use a random one 4.- If we still don't have
   * a clear UUID, use a random one (backup plan)
   *
   **/
  private void addMetadata(RemoteFile rf) throws Exception {
    Element md = retrieveMetadata(rf);
    if (md == null) {
      return;
    }
    //--- schema handled check already done
    String schema = dataMan.autodetectSchema(md);


        // 1.- Look for the file identifier on the metadata xml
        String uuid = dataMan.extractUUID(schema,  md);

        // 2.- If there is no file identifier, then use the name of the file
        if (uuid == null) {
                String path = rf.getPath();
                int start = path.lastIndexOf("/") + 1;
                uuid = path.substring(start, path.length() - 4);
        }

        // 3.- If there is a collision of uuid with existent metadata, use a
        // random one
        if (dataMan.existsMetadataUuid(uuid)) {
                uuid = null;
        }

        // 4.- If we still don't have a clear UUID, use a random one (backup
        // plan)
        if (uuid == null) {
                uuid = UUID.randomUUID().toString();
                log.debug("  - Setting uuid for metadata with remote path : "
                                + rf.getPath());

                // --- set uuid inside metadata and get new xml
                try {
                        md = dataMan.setUUID(schema, uuid, md);
                } catch (Exception e) {
                        log.error("  - Failed to set uuid for metadata with remote path : "
                                        + rf.getPath());
                        errors.add(new HarvestError(e, this.log));
                        return;
                }
        }

        log.debug("  - Adding metadata with remote path : " + rf.getPath());



        if(log.isDebugEnabled()) log.debug("  - Adding metadata with remote path : "+ rf.getPath());

    //
        // insert metadata
        //
        String group = null, isTemplate = null, docType = null, title = null, category = null;
        boolean ufo = false, indexImmediate = false;

        // Get the change date from the metadata content. If not possible, get it from the file change date if available
        // and if not possible use current date
        String date = null;

        try {
            date = dataMan.extractDateModified(schema, md);
        } catch (Exception ex) {
            log.error("WebDavHarvester - addMetadata - Can't get metadata modified date for metadata uuid= " + uuid +
                    ", using current date for modified date");
            // WAF harvester, rf.getChangeDate() returns null
            if (rf.getChangeDate() != null) {
                date = rf.getChangeDate().getDateAndTime();
            }
        }

        String id = dataMan.insertMetadata(context, schema, md, uuid, Integer.parseInt(params.ownerId), group, params.uuid,
                isTemplate, docType, category, date, date, ufo, indexImmediate);


    int iId = Integer.parseInt(id);

    dataMan.setTemplateExt(iId, MetadataType.METADATA);
    dataMan.setHarvestedExt(iId, params.uuid, Optional.of(rf.getPath()));

        addPrivileges(id, params.getPrivileges(), localGroups, dataMan, context, log);
        context.getBean(MetadataRepository.class).update(iId, new Updater<Metadata>() {
            @Override
            public void apply(@Nonnull Metadata entity) {
                addCategories(entity, params.getCategories(), localCateg, context, log, null);
            }
        });

        dataMan.flush();

        dataMan.indexMetadata(id, false);
    result.addedMetadata++;
  }
 
  //--------------------------------------------------------------------------

  private Element retrieveMetadata(RemoteFile rf) {
    try {
            if(log.isDebugEnabled()) log.debug("Getting remote file : "+ rf.getPath());
      Element md = rf.getMetadata(schemaMan);
            if(log.isDebugEnabled()) log.debug("Record got:\n"+ Xml.getString(md));

      String schema = dataMan.autodetectSchema(md);
      if (!params.validate || validates(schema, md)) {
        return (Element) md.detach();
      }

      log.warning("Skipping metadata that does not validate. Path is : "+ rf.getPath());
      result.doesNotValidate++;
    }
    catch (NoSchemaMatchesException e) {
        log.warning("Skipping metadata with unknown schema. Path is : "+ rf.getPath());
        result.unknownSchema++;
    }
    catch(JDOMException e) {
      log.warning("Skipping metadata with bad XML format. Path is : "+ rf.getPath());
      result.badFormat++;
    }
    catch(Exception e) {
      log.warning("Raised exception while getting metadata file : "+ e);
      result.unretrievable++;
    }
    //--- we don't raise any exception here. Just try to go on
    return null;
  }

  //--------------------------------------------------------------------------

  private boolean validates(String schema, Element md) {
    try {
      dataMan.validate(schema, md);
      return true;
    }
    catch (Exception e) {
      return false;
    }
  }

  //--------------------------------------------------------------------------
  //---
  //--- Private methods : updateMetadata
  //---
  //--------------------------------------------------------------------------

  private void updateMetadata(RemoteFile rf, RecordInfo record) throws Exception {
    if (!rf.isMoreRecentThan(record.changeDate)) {
            if(log.isDebugEnabled()) log.debug("  - Metadata XML not changed for path : "+ rf.getPath());
      result.unchangedMetadata++;
    }
    else {
            if(log.isDebugEnabled()) log.debug("  - Updating local metadata for path : "+ rf.getPath());
      Element md = retrieveMetadata(rf);
      if (md == null) {
        return;
      }
     
      //--- set uuid inside metadata (on metadata add it's created a new uuid ignoring fileIdentifier uuid).
            //--- In update we should use db uuid to update the xml uuid and keep in sych both.
            String schema = null;
            try {
                schema = dataMan.autodetectSchema(md);
               
                //Update only if different
                String uuid = dataMan.extractUUID(schema,  md);
                if (!record.uuid.equals(uuid)) {
                  md = dataMan.setUUID(schema, record.uuid, md);
                }
            } catch(Exception e) {
                log.error("  - Failed to set uuid for metadata with remote path : "+ rf.getPath());
                return;
            }

            //
            // update metadata
            //
            boolean validate = false;
            boolean ufo = false;
            boolean index = false;
            String language = context.getLanguage();

            // Get the change date from the metadata content. If not possible, get it from the file change date if available
            // and if not possible use current date
            String date = null;

            try {
                date = dataMan.extractDateModified(schema, md);
            } catch (Exception ex) {
                log.error("WebDavHarvester - updateMetadata - Can't get metadata modified date for metadata id= "
                        + record.id + ", using current date for modified date");
                // WAF harvester, rf.getChangeDate() returns null
                if (rf.getChangeDate() != null) {
                    date = rf.getChangeDate().getDateAndTime();
                }
            }

            final Metadata metadata = dataMan.updateMetadata(context, record.id, md, validate, ufo, index, language,
                    date, false);

            //--- the administrator could change privileges and categories using the
      //--- web interface so we have to re-set both
            OperationAllowedRepository repository = context.getBean(OperationAllowedRepository.class);
            repository.deleteAllByIdAttribute(OperationAllowedId_.metadataId, Integer.parseInt(record.id));
            addPrivileges(record.id, params.getPrivileges(), localGroups, dataMan, context, log);

            metadata.getCategories().clear();
            addCategories(metadata, params.getCategories(), localCateg, context, log, null);

            dataMan.flush();

            dataMan.indexMetadata(record.id, false);
      result.updatedMetadata++;
    }
  }

  public List<HarvestError> getErrors() {
    return errors;
  }

  //---------------------------------------------------------------------------
  //---
  //--- Variables
  //---
  //---------------------------------------------------------------------------

  private Logger log;
  private ServiceContext context;
  private WebDavParams params;
  private DataManager dataMan;
  private CategoryMapper localCateg;
  private GroupMapper localGroups;
  private UriMapper localUris;
  private HarvestResult result;
  private SchemaManager  schemaMan;
    private List<HarvestError> errors = new LinkedList<HarvestError>();
}

//=============================================================================

interface RemoteRetriever {
  public void init(Logger log, ServiceContext context, WebDavParams params);
  public List<RemoteFile> retrieve() throws Exception;
  public void destroy();
}

//=============================================================================

interface RemoteFile {
  public String  getPath();
  public ISODate getChangeDate();
  public Element getMetadata(SchemaManager  schemaMan) throws Exception;
  public boolean isMoreRecentThan(String localDate);
}

//=============================================================================
TOP

Related Classes of org.fao.geonet.kernel.harvest.harvester.webdav.Harvester

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.