Package org.apache.marmotta.ldclient.provider.facebook

Source Code of org.apache.marmotta.ldclient.provider.facebook.FacebookGraphProvider$ResponseHandler

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.marmotta.ldclient.provider.facebook;

import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.DateUtils;
import org.apache.http.util.EntityUtils;
import org.apache.marmotta.commons.http.UriUtil;
import org.apache.marmotta.commons.vocabulary.DCTERMS;
import org.apache.marmotta.commons.vocabulary.FOAF;
import org.apache.marmotta.commons.vocabulary.SCHEMA;
import org.apache.marmotta.ldclient.api.endpoint.Endpoint;
import org.apache.marmotta.ldclient.api.ldclient.LDClientService;
import org.apache.marmotta.ldclient.api.provider.DataProvider;
import org.apache.marmotta.ldclient.exception.DataRetrievalException;
import org.apache.marmotta.ldclient.model.ClientResponse;
import org.openrdf.model.Model;
import org.openrdf.model.URI;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.impl.TreeModel;
import org.openrdf.model.impl.ValueFactoryImpl;
import org.openrdf.model.vocabulary.RDF;
import org.openrdf.model.vocabulary.SKOS;
import org.openrdf.repository.RepositoryException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* A provider that accesses objects exposed by the Facebook Graph API (in JSON format). The provider will map the
* properties of Facebook Objects to RDF and tries choosing the most appropriate RDF types and vocabularies. The base
* vocabulary used for describing objects is dcterms; the rdf types of objects will be taken from schema.org.
* <p/>
* @see    <a href="http://developers.facebook.com/docs/reference/api/">Facebook Graph API</a>
* @author Sebastian Schaffert (sschaffert@apache.org)
*/
public class FacebookGraphProvider implements DataProvider {

    public static final String PROVIDER_NAME = "Facebook Graph API";


    private static final Pattern pattern = Pattern.compile("http://www\\.facebook\\.com/([^/]+/)*([^/]+)");
    public static final int RETRY_AFTER = 60;

    private static Logger log = LoggerFactory.getLogger(FacebookGraphProvider.class);

    private static String[] defaultLanguages = new String[] {"en", "de", "fr", "es", "it"};

    private static Map<String,URI> facebookCategories = new HashMap<String, URI>();
    static {
        // see http://www.marketinggum.com/types-of-facebook-pages-for-business/

        // Local Business or Place
        facebookCategories.put("attractions/things to do", SCHEMA.TouristAttraction);
        facebookCategories.put("bank", SCHEMA.BankOrCreditUnion);
        facebookCategories.put("bar", SCHEMA.BarOrPub);
        facebookCategories.put("book store", SCHEMA.BookStore);
        facebookCategories.put("concert venue", SCHEMA.MusicVenue);
        facebookCategories.put("food/grocery", SCHEMA.GroceryStore);
        facebookCategories.put("hotel", SCHEMA.Hotel);
        facebookCategories.put("local business", SCHEMA.LocalBusiness);
        facebookCategories.put("movie theatre", SCHEMA.MovieTheater);
        facebookCategories.put("museum/art gallery", SCHEMA.Museum);
        facebookCategories.put("outdoor gear/sporting goods", SCHEMA.SportingGoodsStore);
        facebookCategories.put("real estate", SCHEMA.RealEstateAgent);
        facebookCategories.put("restaurant/café", SCHEMA.CafeOrCoffeeShop);
        facebookCategories.put("arts/entertainment/nightlife", SCHEMA.NightClub);
        facebookCategories.put("school", SCHEMA.School);


        // Company Organization or Institution
        facebookCategories.put("automobiles and parts", SCHEMA.AutoPartsStore);
        facebookCategories.put("church", SCHEMA.Church);
        facebookCategories.put("company", SCHEMA.Corporation);
        facebookCategories.put("computers/technology", SCHEMA.ComputerStore);
        facebookCategories.put("consulting/business services", SCHEMA.Corporation);
        facebookCategories.put("insurance company", SCHEMA.InsuranceAgency);
        facebookCategories.put("internet/software", SCHEMA.SoftwareApplication);
        facebookCategories.put("legal/law", SCHEMA.Attorney);
        facebookCategories.put("retail and consumer merchandise", SCHEMA.ConvenienceStore);
        facebookCategories.put("media/news/publishing", SCHEMA.Corporation);
        facebookCategories.put("travel/leisure", SCHEMA.TravelAgency);


        // Brand or Product
        facebookCategories.put("app", SCHEMA.MobileApplication);
        facebookCategories.put("appliance", SCHEMA.Product);
        facebookCategories.put("baby goods/kids goods", SCHEMA.Product);
        facebookCategories.put("cars", SCHEMA.AutoPartsStore);
        facebookCategories.put("clothing", SCHEMA.ClothingStore);
        facebookCategories.put("electronics", SCHEMA.ElectronicsStore);
        facebookCategories.put("food/beverages", SCHEMA.FoodEstablishment);
        facebookCategories.put("furniture", SCHEMA.FurnitureStore);
        facebookCategories.put("games/toys", SCHEMA.ToyStore);
        facebookCategories.put("health/beauty", SCHEMA.BeautySalon);
        facebookCategories.put("jewelry/watches", SCHEMA.JewelryStore);
        facebookCategories.put("kitchen/cooking", SCHEMA.HomeGoodsStore);
        facebookCategories.put("pet supplies", SCHEMA.PetStore);
        facebookCategories.put("vitamins/minerals", SCHEMA.Product);



        // Artist, Band or Public Figure
        facebookCategories.put("actor/director", SCHEMA.Person);
        facebookCategories.put("artist", SCHEMA.Person);
        facebookCategories.put("athlete", SCHEMA.Person);
        facebookCategories.put("author", SCHEMA.Person);
        facebookCategories.put("business person", SCHEMA.Person);
        facebookCategories.put("chef", SCHEMA.Person);
        facebookCategories.put("coach", SCHEMA.Person);
        facebookCategories.put("doctor", SCHEMA.Person);
        facebookCategories.put("entertainer", SCHEMA.Person);
        facebookCategories.put("journalist", SCHEMA.Person);
        facebookCategories.put("lawyer", SCHEMA.Person);
        facebookCategories.put("musician/band", SCHEMA.MusicGroup);
        facebookCategories.put("politician", SCHEMA.Person);
        facebookCategories.put("teacher", SCHEMA.Person);
        facebookCategories.put("writer", SCHEMA.Person);


        // entertainment
        facebookCategories.put("movie",SCHEMA.Movie);
        facebookCategories.put("album",SCHEMA.MusicAlbum);
        facebookCategories.put("book", SCHEMA.Book);
        facebookCategories.put("concert tour", SCHEMA.MusicEvent);
        facebookCategories.put("library", SCHEMA.Library);
        facebookCategories.put("magazine", SCHEMA.CreativeWork);
        facebookCategories.put("radio station", SCHEMA.RadioStation);
        facebookCategories.put("record label", SCHEMA.Brand);
        facebookCategories.put("sports venue", SCHEMA.SportsActivityLocation);
        facebookCategories.put("tv channel", SCHEMA.TelevisionStation);
        facebookCategories.put("tv show", SCHEMA.TVSeries);

        // NGO
        facebookCategories.put("non-profit organization", SCHEMA.NGO);
        facebookCategories.put("cause", SCHEMA.NGO);
        facebookCategories.put("health/medical/pharmaceuticals", SCHEMA.MedicalOrganization);
        facebookCategories.put("health/medical/pharmacy", SCHEMA.MedicalOrganization);
        facebookCategories.put("community/government", SCHEMA.GovernmentOrganization);
        facebookCategories.put("church/religious organization", SCHEMA.Church);
        facebookCategories.put("education", SCHEMA.EducationalOrganization);
        facebookCategories.put("hospital/clinic", SCHEMA.Hospital);

        // interests
        facebookCategories.put("interest", SKOS.CONCEPT);
    }

    /**
     * Parse the HTTP response entity returned by the web service call and return its contents in a Sesame RDF
     * repository also passed as argument. The content type returned by the web service is passed as argument to help
     * the implementation decide how to parse the data. The implementation can return a list of additional pages to
     * retrieve for completing the data of the resource
     *
     * @param resourceUri
     * @param model       an RDF model for storing an RDF representation of the dataset located at the remote resource.
     * @param in          input stream as returned by the remote webservice
     * @param language    content language as returned in the HTTP headers of the remote webservice
     * @return a possibly empty list of URLs of additional resources to retrieve to complete the content
     * @throws java.io.IOException in case an error occurs while reading the input stream
     */
    protected List<String> parseResponse(String resourceUri, String requestUrl, Model model, InputStream in, String language) throws DataRetrievalException {
        ObjectMapper mapper = new ObjectMapper();

        try {
            Map<String,Object> data = mapper.readValue(in, new TypeReference<Map<String,Object>>() { });

            ValueFactory vf = ValueFactoryImpl.getInstance();

            URI subject = vf.createURI(resourceUri);

            // add the type based on the facebook category
            if(data.get("category") != null) {
                model.add(subject, RDF.TYPE, getType(data.get("category").toString()));
            }

            model.add(subject,DCTERMS.identifier,vf.createLiteral(data.get("id").toString()));

            // schema:name is the facebook name (can have multiple languages)
            model.add(subject, SCHEMA.name, vf.createLiteral(data.get("name").toString(), language));
            model.add(subject, DCTERMS.title, vf.createLiteral(data.get("name").toString(), language));

            // dct:description in case a description or about is present (all content in English)
            if(data.get("description") != null) {
                model.add(subject,SCHEMA.description, vf.createLiteral(data.get("description").toString(), "en"));
                model.add(subject,DCTERMS.description, vf.createLiteral(data.get("description").toString(), "en"));
            }
            if(data.get("about") != null) {
                model.add(subject,SCHEMA.description, vf.createLiteral(data.get("about").toString(), "en"));
                model.add(subject,DCTERMS.description, vf.createLiteral(data.get("about").toString(), "en"));
            }

            // if there is genre information, add it using schema:genre and dct:subject
            if(data.get("genre") != null) {
                model.add(subject,SCHEMA.genre, vf.createLiteral(data.get("genre").toString()));
                model.add(subject,DCTERMS.subject, vf.createLiteral(data.get("genre").toString()));
            }
            if(data.get("directed_by") != null) {
                model.add(subject,SCHEMA.director, vf.createLiteral(data.get("directed_by").toString()));
                model.add(subject,DCTERMS.creator, vf.createLiteral(data.get("directed_by").toString()));
            }
            if(data.get("studio") != null) {
                model.add(subject,SCHEMA.publisher, vf.createLiteral(data.get("studio").toString()));
                model.add(subject,DCTERMS.publisher, vf.createLiteral(data.get("studio").toString()));
            }
            if(data.get("plot_outline") != null) {
                model.add(subject,SCHEMA.description, vf.createLiteral(data.get("plot_outline").toString()));
                model.add(subject,DCTERMS.description, vf.createLiteral(data.get("plot_outline").toString()));
            }
            if(data.get("phone") != null) {
                model.add(subject,SCHEMA.telephone, vf.createLiteral(data.get("phone").toString()));
                model.add(subject,FOAF.phone, vf.createLiteral(data.get("phone").toString()));
            }
            if(data.get("username") != null) {
                model.add(subject,FOAF.nick, vf.createLiteral(data.get("username").toString()));
            }

            if(data.get("cover") != null && data.get("cover") instanceof Map && ((Map<?,?>)data.get("cover")).get("source") != null) {
                model.add(subject,FOAF.thumbnail, vf.createURI(((Map<?,?>) data.get("cover")).get("source").toString()));
            }



            // website
            if(data.get("website") != null && UriUtil.validate(data.get("website").toString())) {
                model.add(subject, FOAF.homepage, vf.createURI(data.get("website").toString()));
            }
            if(data.get("link") != null) {
                model.add(subject, FOAF.homepage, vf.createURI(data.get("link").toString()));
            }

        } catch (JsonMappingException e) {
            throw new DataRetrievalException("error while mapping JSON response",e);
        } catch (JsonParseException e) {
            throw new DataRetrievalException("error while parsing JSON response",e);
        } catch (IOException e) {
            throw new DataRetrievalException("error while accessing Facebook Graph API",e);
        }

        return Collections.emptyList();
    }

    /**
     * Return an appropriate RDF type for the given facebook category. Since this is not really documented in facebook,
     * not all categories will be supported. The fallback type is "foaf:Document".
     *
     * @param facebookCategory
     * @return
     */

    private URI getType(String facebookCategory) {
        if(facebookCategories.get(facebookCategory.toLowerCase()) != null) {
            return facebookCategories.get(facebookCategory.toLowerCase());
        } else {
            return FOAF.Document;
        }

    }


    /**
     * Return the name of this data provider. To be used e.g. in the configuration and in log messages.
     *
     * @return
     */
    @Override
    public String getName() {
        return PROVIDER_NAME;
    }

    /**
     * Return the list of mime types accepted by this data provider.
     *
     * @return
     */
    @Override
    public String[] listMimeTypes() {
        return new String[] { "application/json"};
    }




    /**
     * Retrieve the data for a Facebook resource using the given http client and endpoint definition. Since Facebook
     * returns multiple language versions for an object depending on the Accept-Language header, we will issue several
     * requests to the same URL to get all the data. If the endpoint definition contains a property "languages"
     * whose value is an array of language strings, this property is used. Otherwise it reverts to defaultLanguages.
     *
     *
     *
     * @param resourceUri the resource to be retrieved
     * @param endpoint the endpoint definition
     * @return a completely specified client response, including expiry information and the set of triples
     */
    @Override
    public ClientResponse retrieveResource(String resourceUri, LDClientService client, Endpoint endpoint) throws DataRetrievalException {

        try {

            String contentType = "application/json";

            long defaultExpires = client.getClientConfiguration().getDefaultExpiry();
            if(endpoint != null && endpoint.getDefaultExpiry() != null) {
                defaultExpires = endpoint.getDefaultExpiry();
            }

            final ResponseHandler handler = new ResponseHandler(resourceUri, endpoint);


            String requestUri;
            log.info("retrieving Facebook Graph object {}",resourceUri);

            // if a URI starts with http://www.facebook.com we redirect to the object in the Graph API at http://graph.facebook.com
            Matcher matcher = pattern.matcher(resourceUri);
            if(matcher.matches()) {
                log.info("... redirecting to {}",resourceUri);
                requestUri = "http://graph.facebook.com/"+matcher.group(2);
            } else {
                requestUri = resourceUri;
            }

            String[] languages;
            if(endpoint.getProperty("languages") != null) {
                languages = endpoint.getProperty("languages").split(",");
            } else {
                languages = defaultLanguages;
            }

            for(String lang : languages) {
                HttpGet get = new HttpGet(requestUri);
                try {
                    get.setHeader("Accept",contentType);
                    get.setHeader("Accept-Language", lang);

                    log.info("retrieving resource data for {} from '{}' endpoint, request URI is <{}>", new Object[]  {resourceUri, getName(), get.getURI().toASCIIString()});

                    handler.requestUrl = requestUri;
                    handler.language   = lang;
                    client.getClient().execute(get, handler);
                } finally {
                    get.releaseConnection();
                }
            }

            Date expiresDate = handler.expiresDate;
            if (expiresDate == null) {
                expiresDate = new Date(System.currentTimeMillis() + defaultExpires * 1000);
            }

            long min_expires = System.currentTimeMillis() + client.getClientConfiguration().getMinimumExpiry() * 1000;
            if (expiresDate.getTime() < min_expires) {
                log.info("expiry time returned by request lower than minimum expiration time; using minimum time instead");
                expiresDate = new Date(min_expires);
            }

            if(log.isInfoEnabled()) {
                log.info("retrieved {} triples for resource {}; expiry date: {}", new Object[]{handler.triples.size(), resourceUri, expiresDate});
            }

            ClientResponse result = new ClientResponse(200, handler.triples);
            result.setExpires(expiresDate);
            return result;
        } catch (RepositoryException e) {
            log.error("error while initialising Sesame repository; classpath problem?",e);
            throw new DataRetrievalException("error while initialising Sesame repository; classpath problem?",e);
        } catch (ClientProtocolException e) {
            log.error("HTTP client error while trying to retrieve resource {}: {}", resourceUri, e.getMessage());
            throw new DataRetrievalException("I/O error while trying to retrieve resource "+resourceUri,e);
        } catch (IOException e) {
            log.error("I/O error while trying to retrieve resource {}: {}", resourceUri, e.getMessage());
            throw new DataRetrievalException("I/O error while trying to retrieve resource "+resourceUri,e);
        } catch(RuntimeException ex) {
            log.error("Unknown error while trying to retrieve resource {}: {}", resourceUri, ex.getMessage());
            throw new DataRetrievalException("Unknown error while trying to retrieve resource "+resourceUri,ex);
        }

    }


    protected class ResponseHandler implements org.apache.http.client.ResponseHandler<List<String>> {

        private Date             expiresDate;

        private String                requestUrl;

        // the repository where the triples will be stored in case the data providers return them
        private final Model triples;

        private final Endpoint   endpoint;

        private final String resource;

        private int httpStatus;

        // language tag to use for literals
        public String language;

        public ResponseHandler(String resource, Endpoint endpoint) throws RepositoryException {
            this.resource = resource;
            this.endpoint = endpoint;

            triples = new TreeModel();
        }

        @Override
        public List<String> handleResponse(HttpResponse response) throws IOException {
            ArrayList<String> requestUrls = new ArrayList<String>();

            if (response.getStatusLine().getStatusCode() >= 200 && response.getStatusLine().getStatusCode() < 400) {
                final HttpEntity entity = response.getEntity();
                if (entity == null)
                    throw new IOException("no content returned by Linked Data resource " + resource);

                this.httpStatus = response.getStatusLine().getStatusCode();

                if (entity != null) {
                    InputStream in = entity.getContent();
                    try {

                        List<String> urls = parseResponse(resource, requestUrl, triples, in, language);
                        requestUrls.addAll(urls);

                        if (expiresDate == null) {
                            Header expires = response.getFirstHeader("Expires");
                            if (expires != null) {
                                expiresDate = DateUtils.parseDate(expires.getValue());
                            }
                        }

                    } catch (DataRetrievalException e) {
                        // FIXME: get.abort();
                        throw new IOException(e);
                    } finally {
                        in.close();
                    }
                }
                EntityUtils.consume(entity);
            } else if(response.getStatusLine().getStatusCode() == 500 || response.getStatusLine().getStatusCode() == 503  || response.getStatusLine().getStatusCode() == 504) {
                this.httpStatus = response.getStatusLine().getStatusCode();

                Header retry = response.getFirstHeader("Retry-After");
                if(retry != null) {
                    try {
                        int duration = Integer.parseInt(retry.getValue());
                        expiresDate = new Date(System.currentTimeMillis() + duration*1000);
                    } catch(NumberFormatException ex) {
                        log.debug("error parsing Retry-After: header");
                    }
                } else {
                    expiresDate = new Date(System.currentTimeMillis() + RETRY_AFTER *1000);
                }

            } else {
                log.error("the HTTP request failed (status: {})", response.getStatusLine());
                throw new ClientProtocolException("the HTTP request failed (status: " + response.getStatusLine() + ")");
            }

            return requestUrls;
        }

        public Endpoint getEndpoint() {
            return endpoint;
        }

        public int getHttpStatus() {
            return httpStatus;
        }

    }

}
TOP

Related Classes of org.apache.marmotta.ldclient.provider.facebook.FacebookGraphProvider$ResponseHandler

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.