Package org.apache.marmotta.ldclient.services.provider

Source Code of org.apache.marmotta.ldclient.services.provider.AbstractHttpProvider$ResponseHandler

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.marmotta.ldclient.services.provider;

import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.cookie.DateParseException;
import org.apache.http.impl.cookie.DateUtils;
import org.apache.http.util.EntityUtils;
import org.apache.marmotta.commons.collections.CollectionUtils;
import org.apache.marmotta.commons.http.ContentType;
import org.apache.marmotta.ldclient.api.endpoint.Endpoint;
import org.apache.marmotta.ldclient.api.ldclient.LDClientService;
import org.apache.marmotta.ldclient.api.provider.DataProvider;
import org.apache.marmotta.ldclient.exception.DataRetrievalException;
import org.apache.marmotta.ldclient.model.ClientResponse;
import org.openrdf.repository.Repository;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.RepositoryException;
import org.openrdf.repository.sail.SailRepository;
import org.openrdf.sail.memory.MemoryStore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.Set;

import static org.apache.marmotta.commons.http.LMFHttpUtils.parseContentType;

/**
* Add file description here!
* <p/>
* Author: Sebastian Schaffert
*/
public abstract class AbstractHttpProvider implements DataProvider {

    public static final int RETRY_AFTER = 60;
    private static Logger log = LoggerFactory.getLogger(AbstractHttpProvider.class);

    /**
     * Build the URL to use to call the webservice in order to retrieve the data for the resource passed as argument.
     * In many cases, this will just return the URI of the resource (e.g. Linked Data), but there might be data providers
     * that use different means for accessing the data for a resource, e.g. SPARQL or a Cache.
     *
     *
     *
     * @param resourceUri
     * @param endpoint endpoint configuration for the data provider (optional)
     * @return
     */
    protected abstract List<String> buildRequestUrl(String resourceUri, Endpoint endpoint) throws DataRetrievalException;

    /**
     * Parse the HTTP response entity returned by the web service call and return its contents in a Sesame RDF
     * repository also passed as argument. The content type returned by the web service is passed as argument to help
     * the implementation decide how to parse the data. The implementation can return a list of additional pages to
     * retrieve for completing the data of the resource
     *
     *
     *
     *
     * @param resourceUri
     * @param repository   an RDF repository for storing an RDF representation of the dataset located at the remote resource.
     * @param in           input stream as returned by the remote webservice
     * @param contentType  content type as returned in the HTTP headers of the remote webservice
     * @return a possibly empty list of URLs of additional resources to retrieve to complete the content
     * @throws java.io.IOException in case an error occurs while reading the input stream
     */
    protected abstract List<String> parseResponse(String resourceUri, String requestUrl, Repository repository, InputStream in, String contentType) throws DataRetrievalException;

    /**
     * Retrieve the data for a resource using the given http client and endpoint definition. The service is
     * supposed to manage the connection handling itself. See {@link AbstractHttpProvider}
     * for a generic implementation of this method.
     *
     *
     *
     * @param resource the resource to be retrieved
     * @param endpoint the endpoint definition
     * @return a completely specified client response, including expiry information and the set of triples
     */
    @Override
    public ClientResponse retrieveResource(String resource, LDClientService client, Endpoint endpoint) throws DataRetrievalException {

        try {

            String contentType;
            if(endpoint != null && endpoint.getContentTypes().size() > 0) {
                contentType = CollectionUtils.fold(endpoint.getContentTypes(), new CollectionUtils.StringSerializer<ContentType>() {
                    @Override
                    public String serialize(ContentType contentType) {
                        return contentType.toString("q");
                    }
                },",");
            } else {
                contentType = CollectionUtils.fold(Arrays.asList(listMimeTypes()), ",");
            }

            long defaultExpires = client.getClientConfiguration().getDefaultExpiry();
            if(endpoint != null && endpoint.getDefaultExpiry() != null) {
                defaultExpires = endpoint.getDefaultExpiry();
            }

            final ResponseHandler handler = new ResponseHandler(resource, endpoint);

            // a queue for queuing the request URLs needed to build the query response
            Queue<String> requestUrls = new LinkedList<String>();
            requestUrls.addAll(buildRequestUrl(resource, endpoint));

            Set<String> visited = new HashSet<String>();

            String requestUrl = requestUrls.poll();
            while(requestUrl != null) {

                if(!visited.contains(requestUrl)) {
                    HttpGet get = new HttpGet(requestUrl);
                    try {
                        get.setHeader("Accept",contentType);
                        get.setHeader("Accept-Language", "*"); // PoolParty compatibility

                        log.info("retrieving resource data for {} from '{}' endpoint, request URI is <{}>", new Object[]  {resource, getName(), get.getURI().toASCIIString()});

                        handler.requestUrl = requestUrl;
                        List<String> additionalRequestUrls = client.getClient().execute(get, handler);
                        requestUrls.addAll(additionalRequestUrls);

                        visited.add(requestUrl);
                    } finally {
                        get.releaseConnection();
                    }
                }

                requestUrl = requestUrls.poll();
            }

            Date expiresDate = handler.expiresDate;
            if (expiresDate == null) {
                expiresDate = new Date(System.currentTimeMillis() + defaultExpires * 1000);
            }

            long min_expires = System.currentTimeMillis() + client.getClientConfiguration().getMinimumExpiry() * 1000;
            if (expiresDate.getTime() < min_expires) {
                log.info("expiry time returned by request lower than minimum expiration time; using minimum time instead");
                expiresDate = new Date(min_expires);
            }

            if(log.isInfoEnabled()) {
                RepositoryConnection con = handler.triples.getConnection();
                log.info("retrieved {} triples for resource {}; expiry date: {}",new Object[] {con.size(),resource,expiresDate});
                con.close();
            }

            ClientResponse result = new ClientResponse(handler.httpStatus, handler.triples);
            result.setExpires(expiresDate);
            return result;
        } catch (RepositoryException e) {
            log.error("error while initialising Sesame repository; classpath problem?",e);
            throw new DataRetrievalException("error while initialising Sesame repository; classpath problem?",e);
        } catch (ClientProtocolException e) {
            log.error("HTTP client error while trying to retrieve resource {}: {}", resource, e.getMessage());
            throw new DataRetrievalException("I/O error while trying to retrieve resource "+resource,e);
        } catch (IOException e) {
            log.error("I/O error while trying to retrieve resource {}: {}", resource, e.getMessage());
            throw new DataRetrievalException("I/O error while trying to retrieve resource "+resource,e);
        } catch(RuntimeException ex) {
            log.error("Unknown error while trying to retrieve resource {}: {}", resource, ex.getMessage());
            throw new DataRetrievalException("Unknown error while trying to retrieve resource "+resource,ex);
        }

    }

    /**
     * Check whether the content type returned by the server is acceptable to the endpoint and data provider
     */
    protected boolean isValidContentType(String contentType, Endpoint endpoint) {
        if(endpoint != null && endpoint.getContentTypes().size() > 0) {
            ContentType parsed = parseContentType(contentType);
            for(ContentType valid : endpoint.getContentTypes()) {
                if(valid.matches(parsed) || valid.matchesWildcard(parsed)) {
                    return true;
                }
            }
            return false;
        } else {
            // TODO: should probably be removed, since it is not used
            for(String type : listMimeTypes()) {
                if(type.split(";")[0].equalsIgnoreCase(contentType)) return true;
            }
            return false;
        }
    }

    private class ResponseHandler implements org.apache.http.client.ResponseHandler<List<String>> {

        private Date             expiresDate;

        private String                requestUrl;

        // the repository where the triples will be stored in case the data providers return them
        private final Repository triples;

        private final Endpoint   endpoint;

        private final String resource;

        private int httpStatus;

        public ResponseHandler(String resource, Endpoint endpoint) throws RepositoryException {
            this.resource = resource;
            this.endpoint = endpoint;

            triples = new SailRepository(new MemoryStore());
            triples.initialize();
        }

        @Override
        public List<String> handleResponse(HttpResponse response) throws ClientProtocolException, IOException {
            ArrayList<String> requestUrls = new ArrayList<String>();

            if (response.getStatusLine().getStatusCode() >= 200 && response.getStatusLine().getStatusCode() < 400) {
              final HttpEntity entity = response.getEntity();
              if (entity == null)
                throw new IOException("no content returned by Linked Data resource " + resource);

              if (!isValidContentType(entity.getContentType().getValue().split(";")[0], endpoint)) {
                  // FIXME: here was get.abort()
                throw new IOException("invalid content returned by Linked Data resource " + resource + ": "
                    + entity.getContentType().getValue());
              }

                this.httpStatus = response.getStatusLine().getStatusCode();

                if (entity != null) {
                    String parseContentType = "application/rdf+xml";
                    if (endpoint != null && "SPARQL".equals(endpoint.getType())) {
                        parseContentType = "application/sparql-results+xml";
                    } else if (entity.getContentType() != null) {
                        parseContentType = entity.getContentType().getValue().split(";")[0];
                    }

                    InputStream in = entity.getContent();
                    try {

                        List<String> urls = parseResponse(resource, requestUrl, triples, in, parseContentType);
                        requestUrls.addAll(urls);

                        if (expiresDate == null) {
                            Header expires = response.getFirstHeader("Expires");
                            if (expires != null) {
                                try {
                                    expiresDate = DateUtils.parseDate(expires.getValue());
                                } catch (DateParseException e) {
                                    log.debug("error parsing Expires: header");
                                }
                            }
                        }

                    } catch (DataRetrievalException e) {
                        // FIXME: get.abort();
                        throw new IOException(e);
                    } finally {
                        in.close();
                    }
                }
                EntityUtils.consume(entity);
            } else if(response.getStatusLine().getStatusCode() == 500 || response.getStatusLine().getStatusCode() == 503  || response.getStatusLine().getStatusCode() == 504) {
                this.httpStatus = response.getStatusLine().getStatusCode();

                Header retry = response.getFirstHeader("Retry-After");
                if(retry != null) {
                    try {
                        int duration = Integer.parseInt(retry.getValue());
                        expiresDate = new Date(System.currentTimeMillis() + duration*1000);
                    } catch(NumberFormatException ex) {
                        log.debug("error parsing Retry-After: header");
                    }
                } else {
                    expiresDate = new Date(System.currentTimeMillis() + RETRY_AFTER *1000);
                }

            } else {
                log.error("the HTTP request failed (status: {})", response.getStatusLine());
                throw new ClientProtocolException("the HTTP request failed (status: " + response.getStatusLine() + ")");
            }

            return requestUrls;
        }

    }

}
TOP

Related Classes of org.apache.marmotta.ldclient.services.provider.AbstractHttpProvider$ResponseHandler

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.