Package org.apache.stanbol.entityhub.indexing.core.config

Examples of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig


            NUMBER_OF_ENTITIES_EXPECTED,count),
            NUMBER_OF_ENTITIES_EXPECTED <= count);
    }
    @Test
    public void testEntityDataProvider(){
        IndexingConfig config = new IndexingConfig(CONFIG_ROOT+"provider",CONFIG_ROOT+"provider"){};
        EntityIterator entityIdIterator = config.getEntityIdIterator();
        assertNotNull("Unable to perform test whithout EntityIterator",entityIdIterator);
        if(entityIdIterator.needsInitialisation()){
            entityIdIterator.initialise();
        }
        EntityDataProvider dataProvider = config.getEntityDataProvider();
        assertNotNull(dataProvider);
        assertTrue(dataProvider.needsInitialisation());//there are test data to load
        dataProvider.initialise();
        assertEquals(dataProvider.getClass(), RdfIndexingSource.class);
        long count = 0;
View Full Code Here


    /**
     * In the test setup there is no default configuration
     */
    @Test(expected=IllegalArgumentException.class)
    public void missingDefault(){
        new IndexingConfig(); //there is no indexing folder in the user.dir
    }
View Full Code Here

    @Test(expected=IllegalArgumentException.class)
    public void missingConfig(){
        //this should create the specified folder and than throw an
        //illegalArgumentException because the indexing.properties file can not
        //be found in the classpath under
        new IndexingConfig(CONFIG_ROOT+"noConfig");
    }
View Full Code Here

     * In this case the config exists in the classpath, but is not valid because
     * the required indexing.properties is missing
     */
    @Test(expected=IllegalArgumentException.class)
    public void missingConfigDir(){
        new IndexingConfig(CONFIG_ROOT+"missingconfig");
    }
View Full Code Here

     * parsing of configuration files
     */
    @Test
    public void loadSimpleConfigDir() throws IOException {
        String name = CONFIG_ROOT+"simple";
        IndexingConfig config = new IndexingConfig(name);
        //assert that this directory exists (is created)
        File expectedRoot = new File(testRoot,name);
        expectedRoot = new File(expectedRoot,"indexing");
        assertTrue("Root Dir not created",expectedRoot.isDirectory());
        assertEquals("Root dir other the expected ",
            expectedRoot.getCanonicalPath(),config.getRootFolder().getCanonicalPath());
        assertTrue(config.getConfigFolder().isDirectory());
        assertTrue(config.getSourceFolder().isDirectory());
        assertTrue(config.getDestinationFolder().isDirectory());
        assertTrue(config.getDistributionFolder().isDirectory());
        //test the name
        assertEquals(config.getName(),"simple");
        assertEquals(config.getDescription(), "Simple Configuration");
        //test if the normaliser configuration was parsed correctly!
        final ScoreNormaliser normaliser = config.getNormaliser();
        //test if the config files where copied form the classpath to the
        //config directory.
        assertTrue("Config File for the RangeNormaliser not copied",
            new File(config.getConfigFolder(),"range.properties").isFile());
        assertTrue("Config File for the MinScoreNormalizer not copied",
            new File(config.getConfigFolder(),"minscore.properties").isFile());
        //now test if the configuration was parsed correctly
        ScoreNormaliser testNormaliser = normaliser;
        assertNotNull(testNormaliser);
        assertEquals(testNormaliser.getClass(), RangeNormaliser.class);
        testNormaliser = testNormaliser.getChained();
        assertNotNull(testNormaliser);
        assertEquals(testNormaliser.getClass(), NaturalLogNormaliser.class);
        testNormaliser = testNormaliser.getChained();
        assertNotNull(testNormaliser);
        assertEquals(testNormaliser.getClass(), MinScoreNormalizer.class);
        EntityIterator entityIterator = config.getEntityIdIterator();
        assertNotNull(entityIterator);
        assertEquals(entityIterator.getClass(), LineBasedEntityIterator.class);
        if(entityIterator.needsInitialisation()){
            entityIterator.initialise();
        }
        Map<String,Float> entityIds = new HashMap<String,Float>();
        //the values test if the normaliser configuration was readed correctly
        //the keys if the configured entiyScore file was configured correctly
        float boost = 10f/(float)Math.log1p(100);
        entityIds.put("http://www.example.org/entity/test", Float.valueOf(10));
        entityIds.put("http://www.example.org/entity/test2", Float.valueOf((float)(Math.log1p(10)*boost)));
        entityIds.put("http://www.example.org/entity/test3", Float.valueOf(-1));
        while(entityIterator.hasNext()){
            EntityIterator.EntityScore entityScore = entityIterator.next();
            Float expectedScore = entityIds.remove(entityScore.id);
            assertNotNull("Entity with ID "+entityScore.id+" not found!",expectedScore);
            Float score = normaliser.normalise(entityScore.score);
            assertTrue("Entity score "+score+" is not the expected "+expectedScore,expectedScore.compareTo(score)==0);
        }
        assertTrue(entityIds.isEmpty());
        List<EntityProcessor> processors = config.getEntityProcessors();
        assertNotNull(processors);
    }
View Full Code Here

    public static void cleanup(){
        System.setProperty("user.dir", userDir);
    }
    @Test
    public void testEntityDataIterable(){
        IndexingConfig config = new IndexingConfig(CONFIG_ROOT+"iterable");
        EntityDataIterable iterable = config.getDataInterable();
        assertNotNull(iterable);
        assertEquals(iterable.getClass(), RdfIndexingSource.class);
        assertTrue(iterable.needsInitialisation());
        iterable.initialise();
        EntityDataIterator it = iterable.entityDataIterator();
View Full Code Here

            NUMBER_OF_ENTITIES_EXPECTED,count),
            NUMBER_OF_ENTITIES_EXPECTED <= count);
    }
    @Test
    public void testEntityDataProvider(){
        IndexingConfig config = new IndexingConfig(CONFIG_ROOT+"provider");
        EntityIterator entityIdIterator = config.getEntityIdIterator();
        assertNotNull("Unable to perform test whithout EntityIterator",entityIdIterator);
        if(entityIdIterator.needsInitialisation()){
            entityIdIterator.initialise();
        }
        EntityDataProvider dataProvider = config.getEntityDataProvider();
        assertNotNull(dataProvider);
        assertTrue(dataProvider.needsInitialisation());//there are test data to load
        dataProvider.initialise();
        assertEquals(dataProvider.getClass(), RdfIndexingSource.class);
        long count = 0;
View Full Code Here

        trimEntityId = DEFAULT_TRIM_ENTITY;
        trimLine = DEFAULT_TRIM_LINE;
    }
    @Override
    public void setConfiguration(Map<String,Object> config) {
        IndexingConfig indexingConfig = (IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG);
        log.info("Configure {} :",getClass().getSimpleName());
        Object value = config.get(PARAM_CHARSET);
        if(value != null && value.toString() != null){
            this.charset = value.toString();
            log.info("Set charset to '{}'",charset);
        }
        //parse encode/decode EntityIDs
        value = config.get(PARAM_URL_ENCODE_ENTITY_IDS);
        boolean encodeIds;
        if(value != null){
            encodeIds = Boolean.parseBoolean(value.toString());
        } else if (config.containsKey(PARAM_URL_ENCODE_ENTITY_IDS)){
            encodeIds = true;
        } else {
            encodeIds = false;
        }
        value = config.get(PARAM_URL_DECODE_ENTITY_IDS);
        boolean decodeIds;
        if(value != null){
            decodeIds = Boolean.parseBoolean(value.toString());
        } else if (config.containsKey(PARAM_URL_DECODE_ENTITY_IDS)){
            decodeIds = true;
        } else {
            decodeIds = false;
        }
        if(encodeIds && decodeIds){
            throw new IllegalArgumentException(String.format(
                "One can not enable both Parameters '{}' and '{}'!",
                PARAM_URL_DECODE_ENTITY_IDS,PARAM_URL_DECODE_ENTITY_IDS));
        } else if(encodeIds){
            this.encodeEntityIds = 1;
            log.info("activate URL encoding of Entity IDs");
        } else if(decodeIds){
            this.encodeEntityIds = -1;
            log.info("activate URL decoding of Entity IDs");
        }
        value = config.get(PARAM_ENTITY_SCORE_FILE);
        if(value == null || value.toString().isEmpty()){
            scoreFile = indexingConfig.getSourceFile(DEFAULT_ENTITY_SCORE_FILE);
        } else {
            scoreFile = indexingConfig.getSourceFile(value.toString());
        }
        log.info("Set Source File to '"+this.scoreFile+"'");
        //now done in the initialise() method
//        try {
//            initReader(new FileInputStream(scoreFile));
View Full Code Here

     * @return The configured Indexer or an Exception when the configuration was
     * not found or is not valid
     */
    public Indexer create(String dir){
        Indexer indexer;
        IndexingConfig config = new IndexingConfig(dir);
        //get the mode based on the configured IndexingComponents
        EntityDataIterable dataIterable = config.getDataInterable();
        EntityIterator idIterator = config.getEntityIdIterator();
        EntityDataProvider dataProvider = config.getEntityDataProvider();
        EntityScoreProvider scoreProvider = config.getEntityScoreProvider();
       
       
        IndexingDestination destination = config.getIndexingDestination();
        if(destination == null){
            log.error("The indexing configuration does not provide an " +
                "indexing destination. This needs to be configured by the key " +
                "'{}' in the indexing.properties within the directory {}",
                IndexingConstants.KEY_INDEXING_DESTINATION,config.getConfigFolder());
            throw new IllegalArgumentException("No IndexingDestination present");
        }
        List<EntityProcessor> processors = config.getEntityProcessors();
        if(processors == null){
            log.error("The indexing configuration does not provide an " +
                "entity processor. This needs to be configured by the key " +
                "'{}' in the indexing.properties within the directory {}",
                IndexingConstants.KEY_ENTITY_PROCESSOR,config.getConfigFolder());
        }
        log.info("Present Source Configuration:");
        log.info(" - EntityDataIterable: {}",dataIterable);
        log.info(" - EntityIterator: {}",idIterator);
        log.info(" - EntityDataProvider: {}",dataProvider);
        log.info(" - EntityScoreProvider: {}",scoreProvider);
        log.info(" - EntityProcessors ({}):",processors.size());
        int i=0;
        for(EntityProcessor processor : processors){
            i++;
            log.info("    {}) {}",i,processor);
        }
        if(dataIterable != null && scoreProvider != null){
            // iterate over data and lookup scores
            indexer = new IndexerImpl(dataIterable, scoreProvider,
                config.getNormaliser(),destination, processors);
        } else if(idIterator != null && dataProvider != null){
            // iterate over id and lookup data
            indexer = new IndexerImpl(idIterator,dataProvider,
                config.getNormaliser(),destination, processors);
        } else if(dataIterable != null && idIterator != null){
            // create an EntityIterator to EntityScoreProvider adapter
            log.info(
                "Create Adapter from the configured EntityIterator '{}' to the " +
                "required EntityScoreProvider as needed together with the " +
              "configured EntityDataIterable '{}'",
              idIterator.getClass(), dataIterable.getClass());
            indexer = new IndexerImpl(dataIterable,
                new EntityIneratorToScoreProviderAdapter(idIterator),
                config.getNormaliser(),destination, processors);
        } else {
            log.error("Invalid Indexing Source configuration: ");
            log.error(" - To iterate over the data and lookup scores one need to " +
                "configure an EntityDataIterable and an EntityScoreProvider ");
            log.error(" - To iterate over the Id and and lookup data one need to " +
View Full Code Here

        this.loader =  new ResourceLoader(new RdfResourceImporter(indexingDataset), true,true);
        loader.addResource(sourceFileOrDirectory);
    }
    @Override
    public void setConfiguration(Map<String,Object> config) {
        IndexingConfig indexingConfig = (IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG);
        //first init the RDF Model
        Object value = config.get(PARAM_MODEL_DIRECTORY);
        File modelLocation;
        if(value == null){
            modelLocation = new File(indexingConfig.getSourceFolder(),DEFAULT_MODEL_DIRECTORY);
        } else {
            modelLocation = new File(indexingConfig.getSourceFolder(),value.toString());
        }
        this.indexingDataset = createRdfModel(modelLocation);
        //second we need to check if we need to import RDF files to the RDF model
        //create the ResourceLoader
        this.loader =  new ResourceLoader(new RdfResourceImporter(indexingDataset), true);
        //check if importing is deactivated
        boolean importSource = true; //default is true
        value = config.get(PARAM_IMPORT_SOURCE);
        if(value != null){
            importSource = Boolean.parseBoolean(value.toString());
        }
        if(importSource){ // if we need to import ... check the source config
            log.info("Importing RDF data from:");
            value = config.get(PARAM_SOURCE_FILE_OR_FOLDER);
            if(value == null){ //if not set use the default
                value = DEFAULT_SOURCE_FOLDER_NAME;
            }
            for(String source : value.toString().split(",")){
                File sourceFileOrDirectory = indexingConfig.getSourceFile(source);
                if(sourceFileOrDirectory.exists()){
                    //register the configured source with the ResourceLoader
                    this.loader.addResource(sourceFileOrDirectory);
                } else {
                    if(FilenameUtils.getExtension(source).isEmpty()){
                        //non existent directory -> create
                        //This is typically the case if this method is called to
                        //initialise the default configuration. So we will try
                        //to create the directory users need to copy the source
                        //RDF files.
                        if(!sourceFileOrDirectory.mkdirs()){
                            log.warn("Unable to create directory {} configured to improt RDF data from. " +
                                "You will need to create this directory manually before copying the" +
                                "RDF files into it.",sourceFileOrDirectory);
                            //this would not be necessary because the directory will
                            //be empty - however I like to be consistent and have
                            //all configured and existent files & dirs added the the
                            //resource loader
                            this.loader.addResource(sourceFileOrDirectory);
                        }
                    } else {
                        log.warn("Unable to find RDF source {} within the indexing Source folder ",source,indexingConfig.getSourceFolder());
                    }
                }
            }
            if(log.isInfoEnabled()){
                for(String registeredSource : loader.getResources(ResourceState.REGISTERED)){
View Full Code Here

TOP

Related Classes of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.