Package org.apache.http

Examples of org.apache.http.HttpEntity


  }

  public static int fetch(Page page, boolean ignoreIfBinary) {
    String toFetchURL = page.getWebURL().getURL();
    HttpGet get = null;
    HttpEntity entity = null;
    try {
      get = new HttpGet(toFetchURL);
      synchronized (mutex) {
        long now = (new Date()).getTime();
        if (now - startOfPeriod > 10000) {
          logger.info("Number of pages fetched per second: " + processedCount
              / ((now - startOfPeriod) / 1000));
          processedCount = 0;
          startOfPeriod = now;
        }
        processedCount++;

        if (now - lastFetchTime < politenessDelay) {
          Thread.sleep(politenessDelay - (now - lastFetchTime));
        }
        lastFetchTime = (new Date()).getTime();
      }
      HttpResponse response = httpclient.execute(get);
      entity = response.getEntity();

      int statusCode = response.getStatusLine().getStatusCode();
      if (statusCode != HttpStatus.SC_OK) {
        if (statusCode != HttpStatus.SC_NOT_FOUND) {
          if (statusCode == HttpStatus.SC_MOVED_PERMANENTLY || statusCode == HttpStatus.SC_MOVED_TEMPORARILY) {
            Header header = response.getFirstHeader("Location");
            if (header != null) {
              String movedToUrl = header.getValue();
              page.getWebURL().setURL(movedToUrl);
            } else {
              page.getWebURL().setURL(null);
            }
            return PageFetchStatus.Moved;
          }
          logger.info("Failed: " + response.getStatusLine().toString() + ", while fetching " + toFetchURL);
        } else if (show404Pages) {
          logger.info("Not Found: " + toFetchURL + " (Link found in doc#: "
              + page.getWebURL().getParentDocid() + ")");
        }
        return response.getStatusLine().getStatusCode();
      }

      String uri = get.getURI().toString();
      if (!uri.equals(toFetchURL)) {
        if (!URLCanonicalizer.getCanonicalURL(uri).equals(toFetchURL)) {
          int newdocid = DocIDServer.getDocID(uri);
          if (newdocid != -1) {
            if (newdocid > 0) {
              return PageFetchStatus.RedirectedPageIsSeen;
            }
            WebURL webURL = new WebURL();
            webURL.setURL(uri);
            webURL.setDocid(DocIDServer.getNewDocID(uri));
            page.setWebURL(webURL);
          }
        }
      }

      if (entity != null) {
        long size = entity.getContentLength();
        if (size == -1) {
          Header length = response.getLastHeader("Content-Length");
          if (length == null) {
            length = response.getLastHeader("Content-length");
          }
          if (length != null) {
            size = Integer.parseInt(length.getValue());
          } else {
            size = -1;
          }
        }
        if (size > MAX_DOWNLOAD_SIZE) {
          entity.consumeContent();
          return PageFetchStatus.PageTooBig;
        }

        boolean isBinary = false;

        Header type = entity.getContentType();
        if (type != null) {
          String typeStr = type.getValue().toLowerCase();
          if (typeStr.contains("image") || typeStr.contains("audio") || typeStr.contains("video")) {
            isBinary = true;
            if (ignoreIfBinary) {
              return PageFetchStatus.PageIsBinary;
            }
          }
        }

        if (page.load(entity.getContent(), (int) size, isBinary)) {
          return PageFetchStatus.OK;
        } else {
          return PageFetchStatus.PageLoadError;
        }
      } else {
        get.abort();
      }
    } catch (IOException e) {
      logger.error("Fatal transport error: " + e.getMessage() + " while fetching " + toFetchURL
          + " (link found in doc #" + page.getWebURL().getParentDocid() + ")");
      return PageFetchStatus.FatalTransportError;
    } catch (IllegalStateException e) {
      // ignoring exceptions that occur because of not registering https
      // and other schemes
    } catch (Exception e) {
      if (e.getMessage() == null) {
        logger.error("Error while fetching " + page.getWebURL().getURL());
      } else {
        logger.error(e.getMessage() + " while fetching " + page.getWebURL().getURL());
      }
    } finally {
      try {
        if (entity != null) {
          entity.consumeContent();
        } else if (get != null) {
          get.abort();
        }
      } catch (Exception e) {
        e.printStackTrace();
View Full Code Here


         final HttpGet request = new HttpGet(uri);

         // Execute the request
         log.info("Executing request to: " + request.getURI());
         final HttpResponse response = client.execute(request);
         final HttpEntity entity = response.getEntity();
         if (entity == null)
         {
            TestCase.fail("Request returned no entity");
         }

         // Read the result, ensure it's what we're expecting (should be the value of request param "echo")
         final BufferedReader reader = new BufferedReader(new InputStreamReader(entity.getContent()));
         final String line = reader.readLine();
         log.info("Got response: " + line);
         Assert.assertEquals(echoValue, line);
      }
      finally
View Full Code Here

         final HttpGet request = new HttpGet(uri);

         // Execute the request
         log.info("Executing request to: " + request.getURI());
         final HttpResponse response = client.execute(request);
         final HttpEntity entity = response.getEntity();
         if (entity == null)
         {
            TestCase.fail("Request returned no entity");
         }

         // Read the result, ensure it's what we're expecting
         final BufferedReader reader = new BufferedReader(new InputStreamReader(entity.getContent()));
         final String line = reader.readLine();
         log.info("Got response: " + line);
         Assert.assertEquals(OutputLocalBusiness.OUTPUT, line);
      }
      finally
View Full Code Here

         public InputStream getInputStream() throws IOException
         {
            if (stream == null)
            {
               HttpEntity entity = res.getEntity();
               if (entity == null) return null;
               stream = new SelfExpandingBufferredInputStream(entity.getContent());
            }
            return stream;
         }

         public void performReleaseConnection()
View Full Code Here

   
    public void process(final HttpResponse response, final HttpContext context) throws HttpException, IOException {
        if (context == null) {
            throw new IllegalArgumentException("HTTP context may not be null");
        }
    HttpEntity entity = response.getEntity();
    if (entity != null) {
      Header ceheader = entity.getContentEncoding();
      if (ceheader != null) {
        HeaderElement[] codecs = ceheader.getElements();
        for (int i = 0; i < codecs.length; i++) {
          if (codecs[i].getName().equalsIgnoreCase(GZIP_CODEC)) {
//            response.removeHeader(ceheader);
View Full Code Here

     * @return the content as InputStream
     * @throws IOException
     */
  public InputStream getContentstream() throws IOException {
        if (this.httpResponse != null && this.currentRequest != null) {
            final HttpEntity httpEntity = this.httpResponse.getEntity();
            if (httpEntity != null) try {
                    return httpEntity.getContent();
            } catch (final IOException e) {
                ConnectionInfo.removeConnection(this.currentRequest.hashCode());
                this.currentRequest.abort();
                this.currentRequest = null;
                throw e;
View Full Code Here

     * @param outputStream
     * @throws IOException
     */
  public void writeTo(final OutputStream outputStream) throws IOException {
        if (this.httpResponse != null && this.currentRequest != null) {
            final HttpEntity httpEntity = this.httpResponse.getEntity();
            if (httpEntity != null) try {
                httpEntity.writeTo(outputStream);
                outputStream.flush();
                // Ensures that the entity content is fully consumed and the content stream, if exists, is closed.
                EntityUtils.consume(httpEntity);
                ConnectionInfo.removeConnection(this.currentRequest.hashCode());
                this.currentRequest = null;
View Full Code Here

     *
     * @throws IOException
     */
    public void finish() throws IOException {
        if (this.httpResponse != null) {
                final HttpEntity httpEntity = this.httpResponse.getEntity();
        if (httpEntity != null && httpEntity.isStreaming()) {
            // Ensures that the entity content is fully consumed and the content stream, if exists, is closed.
            EntityUtils.consume(httpEntity);
        }
        }
        if (this.currentRequest != null) {
View Full Code Here

      byte[] content = null;
      try {
            execute(httpUriRequest);
            if (this.httpResponse == null) return null;
            // get the response body
            final HttpEntity httpEntity = this.httpResponse.getEntity();
            if (httpEntity != null) {
                if (getStatusCode() == 200 && httpEntity.getContentLength() < maxBytes) {
                    try {
                        content = EntityUtils.toByteArray(httpEntity);
                    } catch (final OutOfMemoryError e) {
                        throw new IOException(e.toString());
                    }
View Full Code Here

      // statistics
      storeConnectionInfo(httpUriRequest);
      // execute the method; some asserts confirm that that the request can be send with Content-Length and is therefore not terminated by EOF
      if (httpUriRequest instanceof HttpEntityEnclosingRequest) {
          final HttpEntityEnclosingRequest hrequest = (HttpEntityEnclosingRequest) httpUriRequest;
          final HttpEntity entity = hrequest.getEntity();
          assert entity != null;
          //assert !entity.isChunked();
          //assert entity.getContentLength() >= 0;
          assert !hrequest.expectContinue();
      }
View Full Code Here

TOP

Related Classes of org.apache.http.HttpEntity

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.