Package org.fao.geonet.kernel.harvest.harvester.geoPREST

Source Code of org.fao.geonet.kernel.harvest.harvester.geoPREST.Harvester

//=============================================================================
//===  Copyright (C) 2001-2013 Food and Agriculture Organization of the
//===  United Nations (FAO-UN), United Nations World Food Programme (WFP)
//===  and United Nations Environment Programme (UNEP)
//===
//===  This program is free software; you can redistribute it and/or modify
//===  it under the terms of the GNU General Public License as published by
//===  the Free Software Foundation; either version 2 of the License, or (at
//===  your option) any later version.
//===
//===  This program is distributed in the hope that it will be useful, but
//===  WITHOUT ANY WARRANTY; without even the implied warranty of
//===  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//===  General Public License for more details.
//===
//===  You should have received a copy of the GNU General Public License
//===  along with this program; if not, write to the Free Software
//===  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
//===
//===  Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2,
//===  Rome - Italy. email: geonetwork@osgeo.org
//==============================================================================

package org.fao.geonet.kernel.harvest.harvester.geoPREST;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLDecoder;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;

import jeeves.server.context.ServiceContext;

import org.apache.commons.lang.StringUtils;
import org.fao.geonet.Constants;
import org.fao.geonet.Logger;
import org.fao.geonet.domain.ISODate;
import org.fao.geonet.exceptions.BadSoapResponseEx;
import org.fao.geonet.exceptions.BadXmlResponseEx;
import org.fao.geonet.exceptions.OperationAbortedEx;
import org.fao.geonet.kernel.harvest.harvester.HarvestError;
import org.fao.geonet.kernel.harvest.harvester.HarvestResult;
import org.fao.geonet.kernel.harvest.harvester.IHarvester;
import org.fao.geonet.kernel.harvest.harvester.RecordInfo;
import org.fao.geonet.utils.GeonetHttpRequestFactory;
import org.fao.geonet.utils.Xml;
import org.fao.geonet.utils.XmlRequest;
import org.jdom.Element;



//=============================================================================

class Harvester implements IHarvester<HarvestResult>
{
  //--------------------------------------------------------------------------
  //---
  //--- Constructor
  //---
  //--------------------------------------------------------------------------

  public Harvester(Logger log, ServiceContext context, GeoPRESTParams params)
  {
    this.log    = log;
    this.context= context;
    this.params = params;

  }

  //---------------------------------------------------------------------------
  //---
  //--- API methods
  //---
  //---------------------------------------------------------------------------

  public HarvestResult harvest(Logger log) throws Exception
  {

      this.log = log;
    //--- perform all searches

    XmlRequest request = context.getBean(GeonetHttpRequestFactory.class).createXmlRequest(new URL(params.baseUrl+"/rest/find/document"));

    Set<RecordInfo> records = new HashSet<RecordInfo>();


        for (Search s : params.getSearches()) {
            try {
                records.addAll(search(request, s));
            } catch (Exception t) {
                log.error("Unknown error trying to harvest");
                log.error(t.getMessage());
                t.printStackTrace();
                errors.add(new HarvestError(t, log));
            } catch (Throwable t) {
                log.fatal("Something unknown and terrible happened while harvesting");
                log.fatal(t.getMessage());
                t.printStackTrace();
                errors.add(new HarvestError(t, log));
            }
        }

        if (params.isSearchEmpty()) {
            try {
                log.debug("Doing an empty search");
                records.addAll(search(request, Search.createEmptySearch()));
            } catch(Exception t) {
                log.error("Unknown error trying to harvest");
                log.error(t.getMessage());
                t.printStackTrace();
                errors.add(new HarvestError(t, log));
            } catch(Throwable t) {
                log.fatal("Something unknown and terrible happened while harvesting");
                log.fatal(t.getMessage());
                t.printStackTrace();
                errors.add(new HarvestError(t, log));
            }
        }

    log.info("Total records processed in all searches :"+ records.size());

    //--- align local node

    Aligner aligner = new Aligner(log, context, params);

    return aligner.align(records, errors);
  }

  //---------------------------------------------------------------------------

  /**
   * Does REST search request.
   */
  private Set<RecordInfo> search(XmlRequest request, Search s) throws Exception
  {
    request.clearParams();
 
    request.addParam("searchText", s.freeText);
    request.addParam("max", params.maxResults);
    Element response = doSearch(request);

    Set<RecordInfo> records = new HashSet<RecordInfo>();

    if (log.isDebugEnabled())
      log.debug("Number of child elements in response: " + response.getChildren().size());

    String rss = response.getName();
    if (!rss.equals("rss")) {
      throw new OperationAbortedEx("Missing 'rss' element in\n", Xml.getString(response));
    }

    Element channel = response.getChild("channel");
    if (channel == null) {
      throw new OperationAbortedEx("Missing 'channel' element in \n", Xml.getString(response));
    }

    @SuppressWarnings("unchecked")
        List<Element> list = channel.getChildren();

    for (Element record :list) {
      if (!record.getName().equals("item")) continue; // skip all the other crap
      RecordInfo recInfo = getRecordInfo((Element)record.clone());
      if (recInfo != null) records.add(recInfo);
    }

    log.info("Records added to result list : "+ records.size());

    return records;
  }

  //---------------------------------------------------------------------------

    private Element doSearch(XmlRequest request) throws OperationAbortedEx {
        try {
            log.info("Searching on : " + params.name);
            Element response = request.execute();
            if (log.isDebugEnabled()) {
                log.debug("Sent request " + request.getSentData());
                log.debug("Search results:\n" + Xml.getString(response));
            }
            return response;
        } catch (BadSoapResponseEx e) {
            errors.add(new HarvestError(e, log));
            throw new OperationAbortedEx("Raised exception when searching: "
                    + e.getMessage(), e);
        } catch (BadXmlResponseEx e) {
            errors.add(new HarvestError(e, log));
            throw new OperationAbortedEx("Raised exception when searching: "
                    + e.getMessage(), e);
        } catch (IOException e) {
            errors.add(new HarvestError(e, log));
            throw new OperationAbortedEx("Raised exception when searching: "
                    + e.getMessage(), e);
        }
    }

  //---------------------------------------------------------------------------

  private RecordInfo getRecordInfo(Element record)
  {
    if (log.isDebugEnabled()) log.debug("getRecordInfo : " + Xml.getString(record));

    String identif = "";

    // get uuid and date modified
    try {
      // uuid is in <guid> child
      String guidLink = record.getChildText("guid");
      if (guidLink != null) {
        guidLink = URLDecoder.decode(guidLink, Constants.ENCODING);
        identif = StringUtils.substringAfter(guidLink, "id=");
      }
      if (identif.length() == 0) {
        log.warning("Record doesn't have a uuid : "+ Xml.getString(record));
        return null; // skip this one
      }

      String modified = record.getChildText("pubDate");
      // modified is using in the form Mon, 04 Feb 2013 10:19:00 +1000
      // it must be converted to ISODate,
      // TODO: does it come in any other form??? Check geoportal stuff?
      Date modDate = sdf.parse(modified);
      modified = new ISODate(modDate.getTime(), false).toString();
      if (modified != null && modified.length() == 0) modified = null;

      if (log.isDebugEnabled())
        log.debug("getRecordInfo: adding "+identif+" with modification date "+modified);
      return new RecordInfo(identif, modified);
        } catch (UnsupportedEncodingException e) {
            HarvestError harvestError = new HarvestError(e, log);
            harvestError.setDescription(harvestError.getDescription() + "\n record: " + Xml.getString(record));
            errors.add(harvestError);
        } catch (ParseException e) {
            HarvestError harvestError = new HarvestError(e, log);
            harvestError.setDescription(harvestError.getDescription() + "\n record: " + Xml.getString(record));
            errors.add(new HarvestError(e, log));
        }

    // we get here if we couldn't get the UUID or date modified
    return null;

  }
 
    public List<HarvestError> getErrors() {
        return errors;
    }

  //---------------------------------------------------------------------------
  //---
  //--- Variables
  //---
  //---------------------------------------------------------------------------
  private Logger         log;
  private GeoPRESTParams params;
  private ServiceContext context;
  private SimpleDateFormat sdf = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss Z");
    /**
     * Contains a list of accumulated errors during the executing of this harvest.
     */
    private List<HarvestError> errors = new LinkedList<HarvestError>();
}

// =============================================================================
TOP

Related Classes of org.fao.geonet.kernel.harvest.harvester.geoPREST.Harvester

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.