Package org.apache.stanbol.entityhub.indexing.core.config

Examples of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig


       provider.close();
    }
    @Override
    public void setConfiguration(Map<String,Object> config) {
        //the IndexingConfig is available via the IndexingConfig.KEY_INDEXING_CONFIG key!
        IndexingConfig indexingConfig = (IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG);
        //configure first the EntityIterator to adapt
        entityIterator = indexingConfig.getEntityIdIterator();
        if(entityIterator == null){
            throw new IllegalArgumentException("No EntityIterator available via the indexing configuration "+indexingConfig.getName());
        }
    }
View Full Code Here


    private boolean includeAll;
   
   
    @Override
    public void setConfiguration(Map<String,Object> config) {
        IndexingConfig indexingConfig = (IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG);
        NamespacePrefixService nsPrefixService = indexingConfig.getNamespacePrefixService();
        log.info("Configure {}",getClass().getSimpleName());
        Object value = config.get(PARAM_PROPERTY_FILTERS);
        if(value == null){
            propertyPrefixMap = Collections.emptyMap();
            propertyMap = Collections.emptyMap();
            includeAll = true;
        } else {
            log.info(" > property Prefix Filters");
            //ensure that longer prefixes are first
            File propertyPrefixConfig = indexingConfig.getConfigFile(value.toString());
            List<String> lines;
            InputStream in = null;
            try {
                in = new FileInputStream(propertyPrefixConfig);
                lines = IOUtils.readLines(in,"UTF-8");
View Full Code Here

     * @throws IllegalArgumentException if the config is <code>null</code>; is
     * missing a value for the {@link IndexingConfig#KEY_INDEXING_CONFIG} or
     * {@link #initTDBDataset(File)} throws an IllegalArgumentException
     */
    public static DatasetGraphTDB getTDBDataset(Map<String,Object> config) {
        IndexingConfig indexingConfig = (IndexingConfig)config.get(KEY_INDEXING_CONFIG);
        if(indexingConfig == null){
            throw new IllegalArgumentException("No IndexingConfig object present as value of key '"
                    + KEY_INDEXING_CONFIG+"'!");
        }
        Object value = config.get(PARAM_MODEL_DIRECTORY);
        File modelLocation;
        if(value == null){
            modelLocation = new File(indexingConfig.getSourceFolder(),DEFAULT_MODEL_DIRECTORY);
        } else {
            modelLocation = new File(indexingConfig.getSourceFolder(),value.toString());
        }
        return initTDBDataset(modelLocation);

    }
View Full Code Here

        this.loader =  new ResourceLoader(new RdfResourceImporter(indexingDataset,importFilter), true,true);
        loader.addResource(sourceFileOrDirectory);
    }
    @Override
    public void setConfiguration(Map<String,Object> config) {
        IndexingConfig indexingConfig = (IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG);
        //first init the RDF Model
        this.indexingDataset = Utils.getTDBDataset(config);
        //second we need to check if we need to import RDF files to the RDF model
        //look if we need want to use an import filter
        Object value = config.get(PARAM_IMPORT_FILTER);
        if(value == null){
            log.info("No RDF Import Filter configured");
            importFilter = null;
        } else {
            String[] filterNames = value.toString().split(",");
            List<RdfImportFilter> filters = new ArrayList<RdfImportFilter>();
            ClassLoader cl = indexingConfig.getClass().getClassLoader();
            for(String filterName : filterNames){
                filterName = filterName.trim();
                try {
                    Class<? extends RdfImportFilter> importFilterClass = cl.loadClass(
                        filterName).asSubclass(RdfImportFilter.class);
                    RdfImportFilter filter = importFilterClass.newInstance();
                    filter.setConfiguration(config);
                    filters.add(filter);
                    log.info("Use RDF ImportFilter {} (type: {})",importFilter,importFilterClass.getSimpleName());
                } catch (ClassNotFoundException e) {
                    throw new IllegalArgumentException("Configured RdfImportFilter '"
                        +filterName+"' not found", e);
                } catch (InstantiationException e) {
                    throw new IllegalArgumentException("Configured RdfImportFilter '"
                            +filterName+"' can not be instantiated", e);
                } catch (IllegalAccessException e) {
                    throw new IllegalArgumentException("Configured RdfImportFilter '"
                            +filterName+"' can not be created", e);
                }
            }
            if(filters.isEmpty()){
                this.importFilter = null;
            } else if(filters.size() == 1){
                this.importFilter = filters.get(0);
            } else {
                this.importFilter = new UnionImportFilter(filters.toArray(
                    new RdfImportFilter[filters.size()]));
            }
        }
       
        boolean failOnError = indexingConfig.isFailOnError();
        //create the ResourceLoader
        this.loader =  new ResourceLoader(new RdfResourceImporter(indexingDataset, importFilter), failOnError);
       
        value = config.get(PARAM_IMPORTED_FOLDER);
        String importedFolderName;
        if(value != null && !value.toString().isEmpty()){
            importedFolderName = value.toString();
        } else {
            importedFolderName = DEFAULT_IMPORTED_FOLDER_NAME;
        }
        File importedFolder = new File(indexingConfig.getSourceFolder(),importedFolderName);
        log.info("Imported RDF File Folder: {}",importedFolder);
        this.loader.setImportedDir(importedFolder);
        //check if importing is deactivated
        boolean importSource = true; //default is true
        value = config.get(PARAM_IMPORT_SOURCE);
        if(value != null){
            importSource = Boolean.parseBoolean(value.toString());
        }
        if(importSource){ // if we need to import ... check the source config
            log.info("Importing RDF data from:");
            value = config.get(PARAM_SOURCE_FILE_OR_FOLDER);
            if(value == null){ //if not set use the default
                value = DEFAULT_SOURCE_FOLDER_NAME;
            }
            for(String source : value.toString().split(",")){
                File sourceFileOrDirectory = indexingConfig.getSourceFile(source);
                if(sourceFileOrDirectory.exists()){
                    //register the configured source with the ResourceLoader
                    this.loader.addResource(sourceFileOrDirectory);
                } else {
                    if(FilenameUtils.getExtension(source).isEmpty()){
                        //non existent directory -> create
                        //This is typically the case if this method is called to
                        //initialise the default configuration. So we will try
                        //to create the directory users need to copy the source
                        //RDF files.
                        if(!sourceFileOrDirectory.mkdirs()){
                            log.warn("Unable to create directory {} configured to improt RDF data from. " +
                                "You will need to create this directory manually before copying the" +
                                "RDF files into it.",sourceFileOrDirectory);
                            //this would not be necessary because the directory will
                            //be empty - however I like to be consistent and have
                            //all configured and existent files & dirs added the the
                            //resource loader
                            this.loader.addResource(sourceFileOrDirectory);
                        }
                    } else {
                        log.warn("Unable to find RDF source {} within the indexing Source folder ",source,indexingConfig.getSourceFolder());
                    }
                }
            }
            if(log.isInfoEnabled()){
                for(String registeredSource : loader.getResources(ResourceState.REGISTERED)){
                    log.info(" > "+registeredSource);
                }
            }
        } else {
            log.info("Importing RDF data deactivated by parameer {}={}"+PARAM_IMPORT_SOURCE,value);
        }
        //STANBOL-765: parsed bnode-prefix from parsed configuration.
        value = config.get(PARAM_BNODE_STATE);
        final Boolean bnodeState;
        if(value != null){
            bnodeState = value instanceof Boolean ? (Boolean) value :
                Boolean.parseBoolean(value.toString());
        } else if(config.containsKey(PARAM_BNODE_STATE)){ //support key without value
            bnodeState = true;
        } else {
            bnodeState = null; //undefined
        }
        if(bnodeState == null || bnodeState){ //null or enabled -> consider prefix
            value = config.get(PARAM_BNODE_PREFIX);
            if(value != null){
                try {
                    new URI(value.toString());
                } catch (URISyntaxException e) {
                    throw new IllegalArgumentException("The configured "+PARAM_BNODE_PREFIX+"='"
                        + value.toString() + "' MUST BE a valid URI!");
                }
                bnodePrefix = value.toString();
            } else if(bnodeState != null) { //use default prefix if bnodeState is true
                bnodePrefix = String.format("urn:bnode:%s:",indexingConfig.getName());
            } // else bnodeState == null and no custom prefix -> disable by default
        }
        if(bnodePrefix != null){
            log.info("Indexing of Bnodes enabled (prefix: {}",bnodePrefix);
        } else {
View Full Code Here

     * @param classpathOffset
     * @return
     */
    protected Indexer create(String dir,String classpathOffset){
        Indexer indexer;
        IndexingConfig config;
        if(classpathOffset != null){
            config= new IndexingConfig(dir,classpathOffset){};
        } else {
            config= new IndexingConfig(dir);
        }
        //get the mode based on the configured IndexingComponents
        EntityDataIterable dataIterable = config.getDataIterable();
        EntityIterator idIterator = config.getEntityIdIterator();
        EntityDataProvider dataProvider = config.getEntityDataProvider();
        EntityScoreProvider scoreProvider = config.getEntityScoreProvider();
       
       
        IndexingDestination destination = config.getIndexingDestination();
        if(destination == null){
            log.error("The indexing configuration does not provide an " +
                "indexing destination. This needs to be configured by the key " +
                "'{}' in the indexing.properties within the directory {}",
                IndexingConstants.KEY_INDEXING_DESTINATION,config.getConfigFolder());
            throw new IllegalArgumentException("No IndexingDestination present");
        }
        List<EntityProcessor> processors = config.getEntityProcessors();
        if(processors == null){
            log.error("The indexing configuration does not provide an " +
                "entity processor. This needs to be configured by the key " +
                "'{}' in the indexing.properties within the directory {}",
                IndexingConstants.KEY_ENTITY_PROCESSOR,config.getConfigFolder());
        }
        List<EntityProcessor> postProcessors = config.getEntityPostProcessors();
        log.info("Present Source Configuration:");
        log.info(" - EntityDataIterable: {}",dataIterable);
        log.info(" - EntityIterator: {}",idIterator);
        log.info(" - EntityDataProvider: {}",dataProvider);
        log.info(" - EntityScoreProvider: {}",scoreProvider);
        log.info(" - EntityProcessors ({}):",processors.size());
        if(postProcessors != null){
            log.info(" - EntityPostProcessors ({}):",postProcessors.size());
        }
        int i=0;
        for(EntityProcessor processor : processors){
            i++;
            log.info("    {}) {}",i,processor);
        }
        if(dataIterable != null && scoreProvider != null){
            // iterate over data and lookup scores
            indexer = new IndexerImpl(dataIterable, scoreProvider,
                config.getNormaliser(),destination, processors,
                config.getIndexedEntitiesIdsFile(),postProcessors);
        } else if(idIterator != null && dataProvider != null){
            // iterate over id and lookup data
            indexer = new IndexerImpl(idIterator,dataProvider,
                config.getNormaliser(),destination, processors,
                config.getIndexedEntitiesIdsFile(),postProcessors);
        } else if(dataIterable != null && idIterator != null){
            // create an EntityIterator to EntityScoreProvider adapter
            log.info(
                "Create Adapter from the configured EntityIterator '{}' to the " +
                "required EntityScoreProvider as needed together with the " +
              "configured EntityDataIterable '{}'",
              idIterator.getClass(), dataIterable.getClass());
            indexer = new IndexerImpl(dataIterable,
                new EntityIneratorToScoreProviderAdapter(idIterator),
                config.getNormaliser(),destination, processors,
                config.getIndexedEntitiesIdsFile(),postProcessors);
        } else {
            log.error("Invalid Indexing Source configuration: ");
            log.error(" - To iterate over the data and lookup scores one need to " +
                "configure an EntityDataIterable and an EntityScoreProvider ");
            log.error(" - To iterate over the Id and and lookup data one need to " +
View Full Code Here

    public static void cleanup(){
        System.setProperty("user.dir", userDir);
    }
    @Test
    public void testEntityDataIterable(){
        IndexingConfig config = new IndexingConfig(CONFIG_ROOT+"iterable",CONFIG_ROOT+"iterable"){};
        EntityDataIterable iterable = config.getDataIterable();
        assertNotNull(iterable);
        assertEquals(iterable.getClass(), RdfIndexingSource.class);
        assertTrue(iterable.needsInitialisation());
        iterable.initialise();
        EntityDataIterator it = iterable.entityDataIterator();
View Full Code Here

            NUMBER_OF_ENTITIES_EXPECTED,count),
            NUMBER_OF_ENTITIES_EXPECTED <= count);
    }
    @Test
    public void testEntityDataProvider(){
        IndexingConfig config = new IndexingConfig(CONFIG_ROOT+"provider",CONFIG_ROOT+"provider"){};
        EntityIterator entityIdIterator = config.getEntityIdIterator();
        assertNotNull("Unable to perform test whithout EntityIterator",entityIdIterator);
        if(entityIdIterator.needsInitialisation()){
            entityIdIterator.initialise();
        }
        EntityDataProvider dataProvider = config.getEntityDataProvider();
        assertNotNull(dataProvider);
        assertTrue(dataProvider.needsInitialisation());//there are test data to load
        dataProvider.initialise();
        assertEquals(dataProvider.getClass(), RdfIndexingSource.class);
        long count = 0;
View Full Code Here

    /**
     * Tests support for Quads (STANBOL-764)
     */
    @Test
    public void testQuadsImport(){
        IndexingConfig config = new IndexingConfig(CONFIG_ROOT+"quads",CONFIG_ROOT+"quads"){};
        EntityIterator entityIdIterator = config.getEntityIdIterator();
        assertNotNull("Unable to perform test whithout EntityIterator",entityIdIterator);
        if(entityIdIterator.needsInitialisation()){
            entityIdIterator.initialise();
        }
        EntityDataProvider dataProvider = config.getEntityDataProvider();
        assertNotNull(dataProvider);
        assertTrue(dataProvider.needsInitialisation());//there are test data to load
        dataProvider.initialise();
        assertEquals(dataProvider.getClass(), RdfIndexingSource.class);
        long count = 0;
View Full Code Here

            9, count),
            9, count);
    }
    @Test
    public void testBNodeSupport(){
        IndexingConfig config = new IndexingConfig(CONFIG_ROOT+"bnode",CONFIG_ROOT+"bnode"){};
        EntityDataIterable iterable = config.getDataIterable();
        assertNotNull(iterable);
        assertEquals(iterable.getClass(), RdfIndexingSource.class);
        assertTrue(iterable.needsInitialisation());
        iterable.initialise();
        ((RdfIndexingSource)iterable).debug();
View Full Code Here

        return false;
    }

    @Override
    public void setConfiguration(Map<String,Object> config) {
        IndexingConfig indexingConfig = (IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG);
        nsPrefixService = indexingConfig.getNamespacePrefixService();
        Object value = config.get(PARAM_FIELD);
        if(value == null || value.toString().isEmpty()){
            this.field = NamespaceMappingUtils.getConfiguredUri(nsPrefixService, DEFAULT_FIELD);
            log.info("Using default Field {}",field);
        } else {
View Full Code Here

TOP

Related Classes of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.