Package net.datacrow.core.services

Source Code of net.datacrow.core.services.SearchTask

/******************************************************************************
*                                     __                                     *
*                              <-----/@@\----->                              *
*                             <-< <  \\//  > >->                             *
*                               <-<-\ __ /->->                               *
*                               Data /  \ Crow                               *
*                                   ^    ^                                   *
*                              info@datacrow.net                             *
*                                                                            *
*                       This file is part of Data Crow.                      *
*       Data Crow is free software; you can redistribute it and/or           *
*        modify it under the terms of the GNU General Public                 *
*       License as published by the Free Software Foundation; either         *
*              version 3 of the License, or any later version.               *
*                                                                            *
*        Data Crow is distributed in the hope that it will be useful,        *
*      but WITHOUT ANY WARRANTY; without even the implied warranty of        *
*           MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.             *
*           See the GNU General Public License for more details.             *
*                                                                            *
*        You should have received a copy of the GNU General Public           *
*  License along with this program. If not, see http://www.gnu.org/licenses  *
*                                                                            *
******************************************************************************/

package net.datacrow.core.services;

import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;

import net.datacrow.console.windows.onlinesearch.OnlineSearchForm;
import net.datacrow.core.objects.DcObject;
import net.datacrow.core.resources.DcResources;
import net.datacrow.core.services.plugin.IServer;
import net.datacrow.util.StringUtils;

import org.apache.log4j.Logger;

/**
* A search task performs the actual online search. The search task is used by the
* online search form (see {@link OnlineSearchForm}) and by processed such as the mass
* update.
*
* The search is performed in multiple steps.
* 1) First the online service (web page or web server) is queried using the input
*    of the user ({@link #getItemKeys()}).
* 2) For each result the item details are retrieved. See ({@link #run()}) and
*    {@link #getItems(String, boolean)}
* 3) The in step 2 retrieved items only contain the bare minimum of information.
*    When the user (or any other process) selects one of the items the full details
*    need to be retrieved ({@link #getItem(URL)})
*
* This class needs to be extended for specific implementations.
*
* @author Robert Jan van der Waals
*/
public abstract class SearchTask extends Thread {

    private static Logger logger = Logger.getLogger(SearchTask.class.getName());

    // retrieve minimal item details
    public static final int _ITEM_MODE_SIMPLE = 0;
    // retrieve full item details
    public static final int _ITEM_MODE_FULL = 1;
   
    private boolean isCancelled = false;
   
    protected IOnlineSearchClient listener;
   
    private int maximum = 200;
   
    private String input;
    private String query;
   
    // The currently used URL or address
    private String address;
    // The selected server
    private IServer server;
    // The selected search mode
    private SearchMode searchMode;
    // The selected region (EN, US, NL, ..)
    private Region region;
    // The selected item retrieval mode
    private int itemMode = _ITEM_MODE_SIMPLE;
   
    private DcObject client;
   
    /**
     * Creates the search task.
     * @param listener
     * @param server
     * @param region
     * @param mode
     * @param query
     */
    public SearchTask(IOnlineSearchClient listener, IServer server,
                      Region region, SearchMode mode, String query) {

    this.listener = listener;
    this.region = region;
    this.searchMode = mode;
    this.server = server;
    this.address = region != null ? region.getUrl() : server.getUrl();
    this.query = query;//StringUtils.normalize(query);
    this.input = query;
  }

    /**
     * Sets the service info. This information is set on every item. This way Data Crow
     * knows where the retrieved information originally came from.
     */
    protected final void setServiceInfo(DcObject dco) {
        String service =  server.getName() + " / " +
                         (region != null ? region.getCode() : "none") + " / " +
                         (searchMode != null ? searchMode.getDisplayName() : "none") + " / " +
                          "value=[" + query + "]";
        dco.setValue(DcObject._SYS_SERVICE, service);
    }

    /**
     * Sets the item retrieval mode: {@link #_ITEM_MODE_FULL} or {@link #_ITEM_MODE_SIMPLE}.
     */
    public final void setItemMode(int mode) {
        this.itemMode = mode;
    }

    public boolean isItemModeSupported() {
        return true;
    }
   
    /**
     * Returns the retrieval mode: {@link #_ITEM_MODE_FULL} or {@link #_ITEM_MODE_SIMPLE}.
     */
    public final int getItemMode() {
        return itemMode;
    }
   
    /**
     * Set the maximum amount of items to be retrieved.
     */
    public final void setMaximum(int maximum) {
        this.maximum = maximum;
    }
   
    /**
     * Cancel the search.
     */
    public final void cancel() {
        isCancelled = true;
    }

    /**
     * Indicates if the search was (attempted) to be canceled.
     */
    public final boolean isCancelled() {
        return isCancelled;
    }

    /**
     * The currently used URL or address.
     */
    public final String getAddress() {
        return address;
    }

    /**
     * The currently used search mode.
     * @see SearchMode
     */
    public final SearchMode getMode() {
        return searchMode;
    }

    /**
     * The currently used region
     * @see Region.
     */
    public final Region getRegion() {
        return region;
    }
   
    public DcObject getClient() {
        return client;
    }

    public void setClient(DcObject client) {
        this.client = client;
    }

    public void setQuery(String query) {
        this.query = query;
    }

    public void setMode(SearchMode searchMode) {
        this.searchMode = searchMode;
    }

    /**
     * The used query as specified by the user.
     */
    public String getQuery() {
        String s = StringUtils.normalize2(query);
           
        s = query.replaceAll(" ", getWhiteSpaceSubst());
        s = s.replaceAll("\n", "");
        s = s.replaceAll("\r", "");
       
        // replace the & character
        int idx = s.indexOf('&');
        while (idx > -1) {
            s = s.substring(0, s.indexOf('&')) + "%26" + s.substring(s.indexOf('&') + 1, s.length());
            idx = s.indexOf('&');
        }
           
        return s;
    }

    /**
     * The currently used server
     * @see IServer
     */
    public final IServer getServer() {
        return server;
    }

    /**
     * The maximum amount of items to be retrieved.
     */
    public final int getMaximum() {
        return maximum;
    }   

    /**
     * The character used to substitute white spaces from the query (see {@link #getQuery()}).
     * Should be overridden by specific implementations.
     */
    public String getWhiteSpaceSubst() {
        return "+";
    }

    /**
     * Queries for the specified item. The service information (see {@link #setServiceInfo(DcObject)})
     * is used to retrieve the information.
     * @param dco The item to be updated.
     * @return The retrieved item or null if no item could be found.
     * @throws Exception
     */
    public DcObject query(DcObject dco) throws Exception {
        String link = (String) dco.getValue(DcObject._SYS_SERVICEURL);
        if (link != null && link.length() > 0) {
            DcObject item = getItem(new URL(link));
            setServiceInfo(item);
            return item;
        }

        return null;
    }

    /**
     * Query for the item(s) using the web key.
     * Note that a key is can be a fully qualified URL, an external ID or something else.
     * @param key The item key (The specific implementation decides the meaning of a key)
     * @param full Indicates if the full details should be retrieved.
     */
    protected Collection<DcObject> getItems(Object key, boolean full) throws Exception {
        Collection<DcObject> items = new ArrayList<DcObject>();
        items.add(getItem(key, full));
        return items;
    }

    /**
     * Query for the item using the web key.
     * @param key The item key (The specific implementation decides the meaning of a key)
     * @param full Indicates if the full details should be retrieved.
     */
    protected abstract DcObject getItem(Object key, boolean full) throws Exception;
   
    /**
     * Query for the item via the URL
     * @param url The direct link to the external item details.
     */
    protected abstract DcObject getItem(URL url) throws Exception;
   
    /**
     * Get every web ID from the page. With these IDs it should be possible to
     * get to the detailed item information.
     * @return The item keys or an empty collection.
     */
    protected abstract Collection<Object> getItemKeys() throws Exception ;
   
    protected void preSearchCheck() {}
   
    /**
     * Here the actual search is performed. This is a standard implementation suited for
     * all online searches.
     */
    @Override
    public void run() {
       
        preSearchCheck();
       
        Collection<Object> keys = new ArrayList<Object>();

        listener.addMessage(DcResources.getText("msgConnectingToServer", getAddress()));

        try {
            keys.addAll(getItemKeys());
        } catch (Exception e) {
            listener.addError(DcResources.getText("msgCouldNotConnectTo", getServer().getName()));
            logger.error(e, e);
        }
       
        listener.processingTotal(keys.size());

        if (keys.size() == 0) {
            listener.addWarning(DcResources.getText("msgNoResultsForKeywords", input));
            listener.stopped();
            return;
        }

        listener.addMessage(DcResources.getText("msgFoundXResults", String.valueOf(keys.size())));
        listener.addMessage(DcResources.getText("msgStartParsingXResults", String.valueOf(keys.size())));
        int counter = 0;
       
        for (Object key : keys) {
           
            if (isCancelled() || counter == getMaximum()) break;
           
            try {
                for (DcObject dco : getItems(key, getItemMode() == _ITEM_MODE_FULL)) {
                    dco.setIDs();
                    setServiceInfo(dco);
                   
                    listener.addMessage(DcResources.getText("msgParsingSuccessfull", dco.toString()));
                    listener.addObject(dco);
                    sleep(1000);
                }
                listener.processed(counter);
            } catch (Exception exp) {
                listener.addMessage(DcResources.getText("msgParsingError", "" + exp));
                logger.error(DcResources.getText("msgParsingError", "" + exp), exp);
                listener.processed(counter);
            }
           
            counter++;
        }
       
        listener.processed(counter);
        listener.stopped();       
    }
}
TOP

Related Classes of net.datacrow.core.services.SearchTask

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.