Package org.apache.stanbol.entityhub.indexing.core

Source Code of org.apache.stanbol.entityhub.indexing.core.IndexerFactory

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.entityhub.indexing.core;

import static org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig.DEFAULT_INDEXED_ENTITIES_ID_FILE_NAME;

import java.io.File;
import java.io.IOException;
import java.util.List;

import org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig;
import org.apache.stanbol.entityhub.indexing.core.config.IndexingConstants;
import org.apache.stanbol.entityhub.indexing.core.impl.IndexerImpl;
import org.apache.stanbol.entityhub.indexing.core.normaliser.ScoreNormaliser;
import org.apache.stanbol.entityhub.indexing.core.source.EntityIneratorToScoreProviderAdapter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Factory used to create {@link Indexer} instances
* @author Rupert Westenthaler
*
*/
public class IndexerFactory {
    private static final Logger log = LoggerFactory.getLogger(IndexerFactory.class);
    /**
     * singleton instance
     */
    private static IndexerFactory instance = null;
   
    /**
     * Singleton constructor
     */
    private IndexerFactory(){
        //Singleton pattern
    }
    /**
     * Getter for the singleton of this factory
     * @return the singleton
     */
    public static IndexerFactory getInstance(){
        if(instance == null){
            instance = new IndexerFactory();
        }
        return instance;
    }
    /**
     * Creates an {@link Indexer} instance based on the configuration
     * relative to the working directory.<p>
     * The configuration is expected within "{workingdir}/indexing"
     * @return The configured Indexer or an Exception when the configuration was
     * not found or is not valid
     */
    public Indexer create() {
        return create(null);
    }
    /**
     * Creates an {@link Indexer} instance based on the configuration. The
     * configuration is expected within the "/indexing" directory of the parsed
     * location.<p>
     * In case a relative path is parsed the current working directory is used
     * as context. That means that the configuration is expected within
     * folder "{workingDir}/{parsedPath}/indexing". For absolute paths the
     * configuration is expected at "{parsedPath}/indexing".
     * @return The configured Indexer or an Exception when the configuration was
     * not found or is not valid
     */
    public Indexer create(String dir){
        return create(dir, null);
    }
    /**
     * Internally used for unit testing. Allows to parse an offset for loading
     * the indexer configuration from the classpath. Currently a protected
     * feature, but might be moved to the public API at a later point of time.
     * (would allow to include multiple default configurations via the
     * classpath).
     * @param dir
     * @param classpathOffset
     * @return
     */
    protected Indexer create(String dir,String classpathOffset){
        Indexer indexer;
        IndexingConfig config;
        if(classpathOffset != null){
            config= new IndexingConfig(dir,classpathOffset){};
        } else {
            config= new IndexingConfig(dir);
        }
        //get the mode based on the configured IndexingComponents
        EntityDataIterable dataIterable = config.getDataIterable();
        EntityIterator idIterator = config.getEntityIdIterator();
        EntityDataProvider dataProvider = config.getEntityDataProvider();
        EntityScoreProvider scoreProvider = config.getEntityScoreProvider();
       
       
        IndexingDestination destination = config.getIndexingDestination();
        if(destination == null){
            log.error("The indexing configuration does not provide an " +
                "indexing destination. This needs to be configured by the key " +
                "'{}' in the indexing.properties within the directory {}",
                IndexingConstants.KEY_INDEXING_DESTINATION,config.getConfigFolder());
            throw new IllegalArgumentException("No IndexingDestination present");
        }
        List<EntityProcessor> processors = config.getEntityProcessors();
        if(processors == null){
            log.error("The indexing configuration does not provide an " +
                "entity processor. This needs to be configured by the key " +
                "'{}' in the indexing.properties within the directory {}",
                IndexingConstants.KEY_ENTITY_PROCESSOR,config.getConfigFolder());
        }
        List<EntityProcessor> postProcessors = config.getEntityPostProcessors();
        log.info("Present Source Configuration:");
        log.info(" - EntityDataIterable: {}",dataIterable);
        log.info(" - EntityIterator: {}",idIterator);
        log.info(" - EntityDataProvider: {}",dataProvider);
        log.info(" - EntityScoreProvider: {}",scoreProvider);
        log.info(" - EntityProcessors ({}):",processors.size());
        if(postProcessors != null){
            log.info(" - EntityPostProcessors ({}):",postProcessors.size());
        }
        int i=0;
        for(EntityProcessor processor : processors){
            i++;
            log.info("    {}) {}",i,processor);
        }
        if(dataIterable != null && scoreProvider != null){
            // iterate over data and lookup scores
            indexer = new IndexerImpl(dataIterable, scoreProvider,
                config.getNormaliser(),destination, processors,
                config.getIndexedEntitiesIdsFile(),postProcessors);
        } else if(idIterator != null && dataProvider != null){
            // iterate over id and lookup data
            indexer = new IndexerImpl(idIterator,dataProvider,
                config.getNormaliser(),destination, processors,
                config.getIndexedEntitiesIdsFile(),postProcessors);
        } else if(dataIterable != null && idIterator != null){
            // create an EntityIterator to EntityScoreProvider adapter
            log.info(
                "Create Adapter from the configured EntityIterator '{}' to the " +
                "required EntityScoreProvider as needed together with the " +
              "configured EntityDataIterable '{}'",
              idIterator.getClass(), dataIterable.getClass());
            indexer = new IndexerImpl(dataIterable,
                new EntityIneratorToScoreProviderAdapter(idIterator),
                config.getNormaliser(),destination, processors,
                config.getIndexedEntitiesIdsFile(),postProcessors);
        } else {
            log.error("Invalid Indexing Source configuration: ");
            log.error(" - To iterate over the data and lookup scores one need to " +
                "configure an EntityDataIterable and an EntityScoreProvider ");
            log.error(" - To iterate over the Id and and lookup data one need to " +
            "configure an EntityIterator and an EntityDataProvider");
            throw new IllegalArgumentException("Invalid Indexing Source configuration");
        }
        return indexer;
    }

    public Indexer create(EntityIterator idIterator, EntityDataProvider dataProvider,
                          ScoreNormaliser normaliser,
                          List<EntityProcessor> processors, IndexingDestination destination){
        return new IndexerImpl(idIterator, dataProvider, normaliser,destination, processors,null,null);
    }
    public Indexer create(EntityIterator idIterator, EntityDataProvider dataProvider,
                          ScoreNormaliser normaliser,
                          List<EntityProcessor> processors, List<EntityProcessor> postProcessors,
                          IndexingDestination destination){
        File tmp;
        try {
            tmp = File.createTempFile("ind-ent-ids","zip");
            tmp.deleteOnExit();
        } catch (IOException e) {
            throw new IllegalStateException("Unable to create temporary file for storing the" +
                    "indexed Entity IDs",e);
        }
        return new IndexerImpl(idIterator, dataProvider, normaliser,destination, processors,
            tmp,postProcessors);
    }
   
    public Indexer create(EntityDataIterable dataIterable,EntityScoreProvider scoreProvider,
                          ScoreNormaliser normaliser,
                          List<EntityProcessor> processors, IndexingDestination destination){
        return new IndexerImpl(dataIterable, scoreProvider, normaliser,destination, processors,null,null);
    }
    public Indexer create(EntityDataIterable dataIterable,EntityScoreProvider scoreProvider,
                          ScoreNormaliser normaliser,
                          List<EntityProcessor> processors,  List<EntityProcessor> postProcessors,
                          IndexingDestination destination){
        File tmp;
        try {
            tmp = File.createTempFile("ind-ent-ids","zip");
            tmp.deleteOnExit();
        } catch (IOException e) {
            throw new IllegalStateException("Unable to create temporary file for storing the" +
                    "indexed Entity IDs",e);
        }
        return new IndexerImpl(dataIterable, scoreProvider, normaliser,destination, processors,
            tmp,postProcessors);
    }
}
TOP

Related Classes of org.apache.stanbol.entityhub.indexing.core.IndexerFactory

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.