Package org.apache.stanbol.entityhub.indexing.source.vcard

Source Code of org.apache.stanbol.entityhub.indexing.source.vcard.VcardIndexingSource$VCardIterator

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.entityhub.indexing.source.vcard;

import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.ADR_COUNTRY;
import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.ADR_EXTENDED;
import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.ADR_LOCALITY;
import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.ADR_POSTAL_CODE;
import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.ADR_POST_OFFICE_ADDRESS;
import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.ADR_REGION;
import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.ADR_STREET;
import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.N_ADDITIONAL;
import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.N_FAMILY;
import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.N_GIVEN;
import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.N_PREFIX;
import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.N_SUFFIX;
import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.RDF_TYPE;
import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.VCARD_ORGANIZATION;
import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.VCARD_PERSON;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;

import net.fortuna.ical4j.data.ParserException;
import net.fortuna.ical4j.vcard.Parameter;
import net.fortuna.ical4j.vcard.Property;
import net.fortuna.ical4j.vcard.VCard;
import net.fortuna.ical4j.vcard.VCardBuilder;
import net.fortuna.ical4j.vcard.VCardFileFilter;
import net.fortuna.ical4j.vcard.property.Address;
import net.fortuna.ical4j.vcard.property.N;
import net.fortuna.ical4j.vcard.property.Org;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.stanbol.entityhub.core.model.InMemoryValueFactory;
import org.apache.stanbol.entityhub.indexing.core.EntityDataIterable;
import org.apache.stanbol.entityhub.indexing.core.EntityDataIterator;
import org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig;
import org.apache.stanbol.entityhub.indexing.core.source.ResourceImporter;
import org.apache.stanbol.entityhub.indexing.core.source.ResourceLoader;
import org.apache.stanbol.entityhub.indexing.core.source.ResourceState;
import org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.Mapping;
import org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum;
import org.apache.stanbol.entityhub.servicesapi.model.Representation;
import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory;
import org.apache.stanbol.entityhub.servicesapi.util.ModelUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import edu.emory.mathcs.backport.java.util.Arrays;
import edu.emory.mathcs.backport.java.util.Collections;

public class VcardIndexingSource implements EntityDataIterable, ResourceImporter {
   
    protected static Logger log = LoggerFactory.getLogger(VcardIndexingSource.class);

    /**
     * The prefix used to create Entities
     */
    private String prefix;

    private char typeSeperatorChar = '/';
   
    private ResourceLoader loader;
    /**
     * The charset used to read the vcard file(s) in the source folder
     */
    private Charset charset = null;
    /**
     * The default Charset ("utf-8"). This is also used to write the vcard files
     * within the destination directory.
     */
    public static final Charset DEFAULT_CHARSET = Charset.forName("UTF8");
    /**
     * Parameter that allows users to define the encoding of the vcard files
     * to import (the {@link #DEFAULT_CHARSET default encoding} is set to
     * "utf-8"
     */
    public static final String PARAM_CHARSET = "encoding";
    /**
     * The Parameter used to configure the source folder(s) relative to the
     * {@link IndexingConfig#getSourceFolder()}. The ',' (comma) is used as
     * separator to parsed multiple sources.
     */
    public static final String PARAM_SOURCE_FILE_OR_FOLDER = "source";
    /**
     * The default directory name used to search for vcard files to be imported
     */
    public static final String DEFAULT_SOURCE_FOLDER_NAME = "vcard";
    /**
     * The prefix used vCard entities
     */
    public static final String PARAM_PREFIX = "prefix";

    /**
     * Used to import vcard files from the
     * {@link IndexingConfig#getSourceFolder() source}/
     * {@link #PARAM_SOURCE_FILE_OR_FOLDER vcard} folder.
     */
    protected ResourceImporter importer;
    /**
     * Folder within the destination directory to temporary copy all the
     * vCard files to import.
     */
    private File vcardFileImportFolder;
    /**
     * List of the files that need to be imported. Initialised in {@link #initialise()}
     */
    @SuppressWarnings("unchecked")
    private List<File> vcardFiles = Collections.emptyList();
    /**
     * Used to create {@link Representation} instances
     */
    private ValueFactory vf = InMemoryValueFactory.getInstance();
    /**
     * The vcard -&gt; ontology mappings
     * TODO make configurable as soon as there are multiple mappings available
     */
    private Map<String,Mapping> mappings = OntologyMappings.schemaOrgMappings;
   
    public VcardIndexingSource() {
        //set relaxed parsing to TRUE
        System.setProperty("ical4j.parsing.relaxed", Boolean.TRUE.toString());
    }
    @Override
    public EntityDataIterator entityDataIterator() {
        return new VCardIterator();
    }

    @Override
    public void close() {
        this.importer = null;
    }

    @Override
    public boolean needsInitialisation() {
        //if there are resources with the state REGISTERED we need an initialisation
        return !loader.getResources(ResourceState.REGISTERED).isEmpty();
    }
    @Override
    public void initialise(){
        //this will call #importResource(..) for all files in the directories
        //configured by the #PARAM_SOURCE_FILE_OR_FOLDER
        loader.loadResources();
        //create the lists
        vcardFiles = Arrays.asList(vcardFileImportFolder.listFiles(
            (FilenameFilter)VCardFileFilter.INSTANCE));
    }

    @Override
    public void setConfiguration(Map<String,Object> config) {
        //init fields
        IndexingConfig indexingConfig = (IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG);
        loader = new ResourceLoader(this, true, false);
        //vcard files are imported from a special folder in the destination dir.
        //this folder needs to be deleted/(re-)created first.
        vcardFileImportFolder = new File(indexingConfig.getDestinationFolder(),"vcard");
        if(vcardFileImportFolder.exists()){
            if(vcardFileImportFolder.isDirectory()){
                try {
                    FileUtils.deleteDirectory(vcardFileImportFolder);
                }catch (IOException e){
                    throw new IllegalStateException("Unable to delete Folder "+
                        vcardFileImportFolder.getAbsolutePath()+" containing the vCard files from a" +
                            "previouse indexing! Please remove this folder manually.",e);
                }
            } else if(!vcardFileImportFolder.delete()){
                throw new IllegalStateException("Unable to delete File "+
                    vcardFileImportFolder.getAbsolutePath()+" containing the vCard data from a" +
                            "previouse indexing! Please remove this File manually.");
            }
        }
        if(!vcardFileImportFolder.mkdirs()){
            throw new IllegalStateException("Unable to delete Folder "+
                vcardFileImportFolder.getAbsolutePath()+" containing the vCard files from a" +
                        "previouse indexing! Please remove this folder manually.");
        }
        //load config
        Object value;
        log.debug("load vcard resources from :");
        value = config.get(PARAM_SOURCE_FILE_OR_FOLDER);
        if(value == null){ //if not set use the default
            value = DEFAULT_SOURCE_FOLDER_NAME;
        }
        for(String source : value.toString().split(",")){
            File sourceFileOrDirectory = indexingConfig.getSourceFile(source);
            if(sourceFileOrDirectory.exists()){
                //register the configured source with the ResourceLoader
                this.loader.addResource(sourceFileOrDirectory);
            } else {
                if(FilenameUtils.getExtension(source).isEmpty()){
                    //non existent directory -> create
                    //This is typically the case if this method is called to
                    //initialise the default configuration. So we will try
                    //to create the directory users need to copy the source
                    //RDF files.
                    if(!sourceFileOrDirectory.mkdirs()){
                        log.warn("Unable to create directory {} configured to improt source data from. " +
                                "You will need to create this directory manually before copying the" +
                                "Source files into it.",sourceFileOrDirectory);
                        //this would not be necessary because the directory will
                        //be empty - however I like to be consistent and have
                        //all configured and existent files & dirs added the the
                        //resource loader
                        this.loader.addResource(sourceFileOrDirectory);
                    }
                } else {
                    log.warn("Unable to find vcard source {} within the indexing Source folder ",source,indexingConfig.getSourceFolder());
                }
            }
        }
        if(log.isDebugEnabled()){
            for(String registeredSource : loader.getResources(ResourceState.REGISTERED)){
                log.debug(" > "+registeredSource);
            }
        }
        //parse the encoding
        value = config.get(PARAM_CHARSET);
        if(value != null){
            String encoding = value.toString();
            if(encoding.isEmpty()){ //use plattform encoding if empty
                charset = Charset.defaultCharset();
            } else {
                try {
                    charset = Charset.forName(encoding);
                } catch (RuntimeException e) {
                    throw new IllegalStateException("The configured encoding '"+
                        encoding+"' is not supported by this Plattform", e);
                }
            }
        } else { //use plattorm encoding if missing
            charset = Charset.defaultCharset();
        }
        //parse the prefix
        value = config.get(PARAM_PREFIX);
        if(value == null || value.toString().isEmpty()){
            throw new IllegalStateException("Teh configuration is missing the required parameter 'prefix'!");
        } else {
            prefix = value.toString();
            //set the typeSeperatorChar based on the kind of parsed prefix
            if(prefix.endsWith("#")){
                typeSeperatorChar = '.';
            } else if (prefix.endsWith("/")){
                typeSeperatorChar = '/';
            } else if (prefix.endsWith(":")){
                typeSeperatorChar = ':';
            } else if (prefix.startsWith("urn:")){ //maybe an urn without an tailing ':'
                prefix = prefix+':';
                typeSeperatorChar = ':';
            } else if (prefix.indexOf("://")>0){ //maybe an url without an tailing '/' or '#'
                prefix = prefix+'/';
            } //else ... no idea what kind of prefix ... use the default '/'
        }
    }

    /**
     * This only copies vCard files to the {@link #vcardFileImportFolder} within the
     * {@link IndexingConfig#getDestinationFolder()}.<p>
     * In addition if a specific {@link #charset} is configured for the
     * vcard files to import this also changes the encoding to the
     * {@link #DEFAULT_CHARSET} (utf-8). This can help users to investigate and
     * correct file encoding related issues.
     * @see org.apache.stanbol.entityhub.indexing.core.source.ResourceImporter#importResource(java.io.InputStream, java.lang.String)
     */
    @Override
    public ResourceState importResource(InputStream is, String resourceName) throws IOException {
        //only copies the file to tmp files in the
        if(resourceName.charAt(0) != '.' && VCardFileFilter.INSTANCE.accept(new File(resourceName))){
            //copy the file to the destination directory
            //1. get the file name used in the destination
            String name = FilenameUtils.getName(resourceName);
            String baseName = FilenameUtils.getBaseName(name);
            String extension = FilenameUtils.getExtension(name);
            File outFile = new File(vcardFileImportFolder,name);
            for(int i = 0;outFile.exists();i++){
                outFile = new File(vcardFileImportFolder,
                    String.format("%s_%s.%s",baseName,i,extension));
            }
            //check the encoding to ensure that in the destination all files use
            // DEFAULT_CHARSET (utf-8)
            if(charset == null || charset.equals(DEFAULT_CHARSET)){
                // no recoding -> copy bytes
                OutputStream os = new FileOutputStream(outFile);
                IOUtils.copy(is, os);
                IOUtils.closeQuietly(os);
                IOUtils.closeQuietly(is);
            } else { //recode
                Reader r = new InputStreamReader(is, charset);
                Writer w = new OutputStreamWriter(new FileOutputStream(outFile), DEFAULT_CHARSET);
                IOUtils.copy(r, w);
                IOUtils.closeQuietly(r);
                IOUtils.closeQuietly(w);
            }
            return ResourceState.LOADED;
        } else {
            log.debug("Resource {} ignored: Not an Vcard file.",resourceName);
            return ResourceState.IGNORED;
        }
    }
   
    private class VCardIterator implements EntityDataIterator {
        Map<EntityType,Map<String,Set<String>>> entityMap;
        Iterator<File> files = vcardFiles.iterator();
        @SuppressWarnings("unchecked")
        Iterator<VCard> vcards = Collections.emptyList().iterator();
        @SuppressWarnings("unchecked")
        Iterator<Representation> representations = Collections.emptyList().iterator();
        Representation nextRepresentation = null;
        Representation currentRepresentation = null;

        private VCardIterator(){
            entityMap = new EnumMap<EntityType,Map<String,Set<String>>>(EntityType.class);
            entityMap.put(EntityType.organization, new HashMap<String,Set<String>>());
            entityMap.put(EntityType.person,  new HashMap<String,Set<String>>());
        }
        /**
         * Parses all {@link VCard} object of the next {@link #files file};
         */
        private Iterator<VCard> parseNext(File file){
            Reader r;
            try {
                r = new InputStreamReader(new FileInputStream(file), DEFAULT_CHARSET);
            } catch (FileNotFoundException e) {
                throw new IllegalStateException("vcard import file "+file+
                    "not found - maybe deleted during import?",e);
            }
            VCardBuilder parser = new VCardBuilder(r);
            try {
                return parser.buildAll().iterator();
            } catch (IOException e) {
                throw new IllegalStateException("Unable to read vcard file "+file,e);
            } catch (ParserException e) {
                throw new IllegalStateException("Unable to parse vcard file "+file,e);
            }
        }

        @Override
        public Representation getRepresentation() {
            return currentRepresentation;
        }

        @Override
        public boolean hasNext() {
            //Iterate while there are still representations, vCards or files
            while(nextRepresentation == null && (representations.hasNext() ||
                    vcards.hasNext() || files.hasNext())) {
                if(representations.hasNext()){ //if more representations
                    nextRepresentation = representations.next(); //set next
                } else { //else process the next vCard object
                    VCard nextVcard = null;
                    //Iterate while there are still more vCards or files
                    while(nextVcard == null && (vcards.hasNext() || files.hasNext())){
                        if(vcards.hasNext()){ //if there are more vCards               
                            nextVcard = vcards.next(); //get next
                        } else { //parse the next file
                            //NOTE: we do not need to check for file.hasNext,
                            //because this was already implicitly checked by the
                            //outer most while loop
                            vcards = parseNext(files.next());
                        }
                    }
                    if(nextVcard != null){
                        representations = processVcard(nextVcard,mappings,entityMap);
                    }
                }
            }
            return nextRepresentation != null;
        }

        @Override
        public String next() {
            if(nextRepresentation == null &&
                    !hasNext()){ //try to get the next
                throw new NoSuchElementException();
            }
            currentRepresentation = nextRepresentation;
            nextRepresentation = null;
            return currentRepresentation.getId();
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException("removal is not supported");
           
        }
        @SuppressWarnings("unchecked")
        @Override
        public void close() {
            //set to empty iterators instead of null. Otherwise I would need
            //to check for null in all the other methods
            files = Collections.emptyList().iterator();
            representations = Collections.emptyList().iterator();
            vcards = Collections.emptyList().iterator();
            nextRepresentation = null;
            currentRepresentation = null;
        }
       
    }
    /**
     * Vcard objects can represent persons (FN is defined) or organisations
     * (no 'FN' but an 'ORG' element)
     * @author Rupert Westenthaler
     *
     */
    private enum EntityType {person,organization}
    /**
     * Converts a vCard object to Representations.
     * @param vCard the vCard object to process
     * @param mappings the Mappings to use
     * @param entityMap the Map holding the ids of already processed vCards. This
     * is used to avoid id conflicts
     * @return Iterator over the processed Representation
     */
    protected Iterator<Representation> processVcard(VCard vCard,Map<String,Mapping> mappings,
        Map<EntityType,Map<String,Set<String>>> entityMap){
        //NOTE: this is protected to allow direct access from the VCardIterator
        String name = null;
        EntityType entityType = null;
        Property nameProperty = vCard.getProperty(Property.Id.FN);
        if(nameProperty != null && nameProperty.getValue() != null && !nameProperty.getValue().isEmpty()){
            entityType = EntityType.person;
            name = nameProperty.getValue();
        } else { //FN name -> maybe a ORG was exported
            Property orgProperty = vCard.getProperty(Property.Id.ORG);
            if(orgProperty != null && ((Org)orgProperty).getValues() != null && ((Org)orgProperty).getValues().length>0){
                entityType = EntityType.organization;
                name = ((Org)orgProperty).getValues()[0];
            }
        }
        if(entityType == null){
            log.warn("Unable to index vCard object without values for FN or ORG parameter (vCard: {})",vCard);
            return Collections.emptyList().iterator();
        }
        String id = null;
        Property uid = vCard.getProperty(Property.Id.UID);
        if(uid != null){
            id = uid.getValue();
        } else {
            id = name;
        }
        id = entityByName(entityMap, entityType, name, id,true);
       
        //we have a name and an id (local name of the URI/URN)
        // ... now parse the vCard
        Representation rep = vf .createRepresentation(
            id);
        Map<String,Representation> representations = new HashMap<String,Representation>();
        representations.put(rep.getId(), rep);
        //add the type
        Mapping typeMapping = mappings.get(
            entityType == EntityType.person ? VCARD_PERSON : VCARD_ORGANIZATION);
        if(typeMapping != null){
            rep.add(NamespaceEnum.rdf+"type", typeMapping.uri);
        }
        log.debug("vCard [type: {} | name: '{}' | id: '{}']",
            new Object[]{entityType,name,rep.getId()});
        for(Property property : vCard.getProperties()){
            Property.Id propertyId = property.getId();
            String propName = propertyId.getPropertyName();
            if(mappings.containsKey(propName)){ //there is a mapping for this property
              //the Representation to write the Information of the current Property
                Representation current;
                //the Map with the mappings to be used for processing the current
                //Property
                Map<String,Mapping> currentMappings;
                Mapping mapping = mappings.get(propName); //May be null!!
                if(mapping == null || mapping.subMappings == null){
                    current = rep; //add to the base Representation
                    currentMappings = mappings; //and use the parsed mappings
                } else {
                    current = null; //indicates we need to create a new Representation
                    currentMappings = mapping.subMappings; //and use the sub mappings
                }
                switch (propertyId) {
                    case N:
                        N n = (N)property;
                        String given = n.getGivenName();
                        String family = n.getFamilyName();
                        if((given == null || given.isEmpty()) && (family == null
                                || family.isEmpty())){
                            log.warn("'N' property '{}'does not define given nor family name -> ignored",
                                n.getValue());
                        } else {
                            if(current == null){ //create new Representation
                                current = createSubRepresentation(rep, ".name",
                                    representations.keySet(), mapping);
                                representations.put(current.getId(), current);
                            }
                            Mapping subPropertyMapping = currentMappings.get(N_GIVEN);
                            if(subPropertyMapping != null && given != null && !given.isEmpty()){
                                current.addNaturalText(subPropertyMapping.uri, StringUtils.chomp(given).trim());
                            }
                            subPropertyMapping = currentMappings.get(N_FAMILY);
                            if(subPropertyMapping != null & family != null && !family.isEmpty()){
                                current.addNaturalText(subPropertyMapping.uri, StringUtils.chomp(family).trim());
                            }
                            String[] additional = n.getAdditionalNames();
                            subPropertyMapping = currentMappings.get(N_ADDITIONAL);
                            if(subPropertyMapping != null & additional != null && additional.length>0){
                                for(String value : additional){
                                    if(value != null && !value.isEmpty()){
                                        current.addNaturalText(subPropertyMapping.uri, StringUtils.chomp(value).trim());
                                    }
                                }
                            }
                            String[] prefixes = n.getPrefixes();
                            subPropertyMapping = currentMappings.get(N_PREFIX);
                            if(subPropertyMapping != null & prefixes != null && prefixes.length>0){
                                for(String value : prefixes){
                                    if(value != null && !value.isEmpty()){
                                        current.addNaturalText(subPropertyMapping.uri, StringUtils.chomp(value).trim());
                                    }
                                }
                            }
                            String[] suffixes = n.getSuffixes();
                            subPropertyMapping = currentMappings.get(N_SUFFIX);
                            if(subPropertyMapping != null & suffixes != null && suffixes.length>0){
                                for(String value : suffixes){
                                    if(value != null && !value.isEmpty()){
                                        current.addNaturalText(subPropertyMapping.uri, StringUtils.chomp(value).trim());
                                    }
                                }
                            }
                        }
                        break;
                    case ADR:
                        Address address = (Address)property;
                        if(address.getValue() != null &&
                                //check of the value does not only contain seperators (',')
                                !address.getValue().replace(';', ' ').trim().isEmpty()){
                            if(current == null){ //create new Representation
                                current = createSubRepresentation(rep, ".adr",
                                    representations.keySet(), mapping);
                                representations.put(current.getId(), current);
                            }
                            Mapping subPropertyMapping = currentMappings.get(ADR_POST_OFFICE_ADDRESS);
                            String value = address.getPoBox();
                            if(subPropertyMapping != null && value != null && !value.isEmpty()){
                                //add string -> this is no natural language text
                                current.add(subPropertyMapping.uri, StringUtils.chomp(value).trim());
                            }
                            value = address.getExtended();
                            subPropertyMapping = currentMappings.get(ADR_EXTENDED);
                            if(subPropertyMapping != null && value != null && !value.isEmpty()){
                                current.addNaturalText(subPropertyMapping.uri, StringUtils.chomp(value).trim());
                            }
                            value = address.getStreet();
                            subPropertyMapping = currentMappings.get(ADR_STREET);
                            if(subPropertyMapping != null && value != null && !value.isEmpty()){
                                current.addNaturalText(subPropertyMapping.uri, StringUtils.chomp(value).trim());
                            }
                            value = address.getLocality();
                            subPropertyMapping = currentMappings.get(ADR_LOCALITY);
                            if(subPropertyMapping != null && value != null && !value.isEmpty()){
                                current.addNaturalText(subPropertyMapping.uri, StringUtils.chomp(value).trim());
                            }
                            value = address.getRegion();
                            subPropertyMapping = currentMappings.get(ADR_REGION);
                            if(subPropertyMapping != null && value != null && !value.isEmpty()){
                                current.addNaturalText(subPropertyMapping.uri, StringUtils.chomp(value).trim());
                            }
                            value = address.getPostcode();
                            subPropertyMapping = currentMappings.get(ADR_POSTAL_CODE);
                            if(subPropertyMapping != null && value != null && !value.isEmpty()){
                                // add string -> this is no natural language text
                                current.add(subPropertyMapping.uri, StringUtils.chomp(value).trim());
                            }
                            value = address.getCountry();
                            subPropertyMapping = currentMappings.get(ADR_COUNTRY);
                            if(subPropertyMapping != null && value != null && !value.isEmpty()){
                                // add string -> based on the standard this should be the two letter code
                                current.add(subPropertyMapping.uri, StringUtils.chomp(value).trim());
                            }
                           
                        } //else empty ADR field -> ignore
                        break;
                    case ORG:
                        Org org = (Org)property;
                        String[] unitHierarchy = org.getValues();
                        Mapping orgNameMapping = currentMappings.get(OntologyMappings.ORG_NAME);
                        if(unitHierarchy.length>0 && orgNameMapping != null &&
                                unitHierarchy[0] != null && unitHierarchy[0].trim().length()>0){
                            String orgName = unitHierarchy[0];
                            if(current == null){ //create new Representation for the Organisation
                                //Note: this is an Entity and no sub-Resource!
                                String orgEntityId = entityByName(entityMap, EntityType.organization,
                                    orgName, null, false);
                                if(orgEntityId == null){
                                    //create new Entity for this Organization
                                    orgEntityId = entityByName(entityMap, EntityType.organization,
                                        orgName, null, true);
                                    current = vf.createRepresentation(orgEntityId);
                                    initSubRepresentation(current, rep, mapping);
                                    representations.put(current.getId(), current);
                                    current.addNaturalText(orgNameMapping.uri, StringUtils.chomp(orgName).trim());
                                    //TODO: inverse relation form the ORG to the
                                    // Person can not be supported without caching
                                    // organisations. Therefore delete this relation for now
                                    if(mapping.invUri != null){
                                        current.removeAll(mapping.invUri);
                                    }
                                    //TODO: Organisation units are not supported
                                } else {
                                    rep.addReference(mapping.uri, orgEntityId);
                                }
                            }
                        }
                        break;
                    default:
                        if(current != null && mapping != null){
                            String value = property.getValue();
                            if(value != null){
                                value = StringUtils.chomp(property.getValue()).trim();
                            }
                            if(value.isEmpty()){
                                log.warn("Unable to index empty value for property {} of vCard {}",
                                    property.getId().getPropertyName(),rep.getId());
                            } else {
                                current.addNaturalText(mapping.uri, value);
                            }
                        } else if(mapping != null){
                            log.warn("Sub-Resources are not supported for Property {} (mapping to {} ignored)!",
                                propName,mapping);
                        } //else no mapping defined
                        break;
                }
                String value = property.getValue();
                log.debug(" - {}: {}",propertyId.getPropertyName(),value);
                for(Parameter param : property.getParameters()){
                    Parameter.Id paramId = param.getId();
                    String paramValue = param.getValue();
                    log.debug("   {}:{}",paramId.getPname(),paramValue);
                }
            } else {
                log.debug("No mapping for Property {} with value {}",propertyId,property.getValue());
            }
        }
        log.debug(" > Mapped Data;");
        if(log.isDebugEnabled()){
            for(Representation tmp : representations.values()){
                log.info(ModelUtils.getRepresentationInfo(tmp));
            }
        }
        log.debug("--- end ---");
        return representations.values().iterator();
    }

    /**
     * @param entityMap the map with all the Entity name -&gt; id mappings
     * @param entityType the type of the entity to search
     * @param name the name of the Entity
     * @param id optionally an id other than the name otherwise the name is used
     * @param create if <code>true</code> is parsed a new Entity is created even
     * if a entity with the same name already exists
     * @return the id of the created or found Entity
     */
    private String entityByName(Map<EntityType,Map<String,Set<String>>> entityMap,
                                EntityType entityType,
                                String name,
                                String id,
                                boolean create) {
        if(id == null) {
            id = name;
        }
        //lookup the existing entities of that type and name
        Set<String> entities = entityMap.get(entityType).get(name);
        if(entities == null){ //if none -> we will create one in this method
            entities = new HashSet<String>(2); //use lower size to save memory
            entityMap.get(entityType).put(name, entities);
        }  
        //make ids only to use ASKII chars and no white spaces
        id = id.replace(' ', '-');
        try { // encode special chars
            //TODO: replace that by ASKII folding
            id = URLEncoder.encode(id, "utf8");
        } catch (UnsupportedEncodingException e) {
            throw new IllegalStateException("This Plattform does not support 'utf8' encoding :(");
        }
        //add prefixes and so on
        id = prefix+entityType+typeSeperatorChar+id;
        //now we have the id
        if(!create){
            //NOTE: this would always return the first Entity if multiple Entities
            // would have been created by using the ceckId method.
            return entities.contains(id) ? id : null;
        } else { //we need to create a new entity
            id = checkId(id, entities);
            entities.add(id);
            return id;
        }
    }

    /**
     * Create a sub-representation by considering the base {@link Representation},
     * IDs already taken by other sub representations. The Id addon the caller
     * would like to add to the id of the base representation. In addition it
     * adds the relation between the base and the sub-representation as well as
     * the type and the inverse links to the sub-representation.
     * @param base the base (parent) representation
     * @param addon the string addon to the id of the base
     * @param takenIds set of IDs that are already taken
     * @param mapping the mapping used to get the information needed to correctly
     * initialise the sub-relation
     */
    private Representation createSubRepresentation(Representation base, String addon, Set<String> takenIds, Mapping mapping) {
        Representation current =  vf.createRepresentation(
            checkId(base.getId()+addon, takenIds));
        initSubRepresentation(current, base, mapping);
        return current;

    }

    /**
     * Initialise the parsed sub-representation by adds the relation between
     * the base and the sub-representation as well as
     * the rdf:type of the sub-relation and the inverse link if the sub- to the
     * base representation.
     * @param toInit The representation to initialise
     * @param base the parent representation
     * @param mapping the mapping
     */
    private void initSubRepresentation(Representation toInit, Representation base, Mapping mapping) {
        Mapping typeMapping = mapping.subMappings.get(RDF_TYPE);
        if(typeMapping != null){
            toInit.addReference(NamespaceEnum.rdf+"type", typeMapping.uri);
        }
        base.addReference(mapping.uri, toInit.getId());
        if(mapping.invUri != null){
            toInit.addReference(mapping.invUri, base.getId());
        }
    }

    /**
     * Adds "-{i}" to the end of the parsed ID until it does no longer conflict
     * with already taken IDs
     * @param id the id
     * @param taken already taken IDs
     * @return a id based on the parsed one that does not conflict with already
     * taken once.
     */
    private String checkId(String id, Set<String> taken) {
        String test = null;
        int i=0;
        while(taken.contains(i == 0 ? id : test)){
            i++;
            test = id+'-'+i;
        }
        if(test != null){
            id = test;
        }
        return id;
    }
    public static void main(String[] args) throws Exception {
     VcardIndexingSource instance = new VcardIndexingSource();
    
     instance.prefix = "http://test.org/";
     VCardBuilder parser = new VCardBuilder(new InputStreamReader(new FileInputStream(new File(args[0])), "utf8"));
     Map<EntityType,Map<String,Set<String>>> entityMap = new EnumMap<EntityType,Map<String,Set<String>>>(EntityType.class);
     entityMap.put(EntityType.organization, new HashMap<String,Set<String>>());
     entityMap.put(EntityType.person,  new HashMap<String,Set<String>>());
     for(VCard vcard : parser.buildAll()){
         instance.processVcard(vcard,OntologyMappings.schemaOrgMappings,entityMap);
     }
    }
   
}
TOP

Related Classes of org.apache.stanbol.entityhub.indexing.source.vcard.VcardIndexingSource$VCardIterator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.