Package net.yacy.cora.document

Examples of net.yacy.cora.document.MultiProtocolURI.toNormalform()


     * @throws IOException
     */
    public void GET(final String uri) throws IOException {
        if (this.currentRequest != null) throw new IOException("Client is in use!");
        final MultiProtocolURI url = new MultiProtocolURI(uri);
        final HttpGet httpGet = new HttpGet(url.toNormalform(true, false, true, false));
        setHost(url.getHost()); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
        this.currentRequest = httpGet;
        execute(httpGet);
    }

View Full Code Here


     * @return the HttpResponse
     * @throws IOException
     */
    public HttpResponse HEADResponse(final String uri) throws IOException {
        final MultiProtocolURI url = new MultiProtocolURI(uri);
        final HttpHead httpHead = new HttpHead(url.toNormalform(true, false, true, false));
        setHost(url.getHost()); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
      execute(httpHead);
      finish();
      ConnectionInfo.removeConnection(httpHead.hashCode());
      return this.httpResponse;
View Full Code Here

     * @throws IOException
     */
    public void POST(final String uri, final InputStream instream, final long length) throws IOException {
      if (this.currentRequest != null) throw new IOException("Client is in use!");
        final MultiProtocolURI url = new MultiProtocolURI(uri);
        final HttpPost httpPost = new HttpPost(url.toNormalform(true, false, true, false));
        String host = url.getHost();
        if (host == null) host = "127.0.0.1";
        setHost(host); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
        final NonClosingInputStreamEntity inputStreamEntity = new NonClosingInputStreamEntity(instream, length);
      // statistics
View Full Code Here

     * @return content bytes
     * @throws IOException
     */
    public byte[] POSTbytes(final String uri, final InputStream instream, final long length) throws IOException {
        final MultiProtocolURI url = new MultiProtocolURI(uri);
        final HttpPost httpPost = new HttpPost(url.toNormalform(true, false, true, false));
        String host = url.getHost();
        if (host == null) host = "127.0.0.1";
        setHost(host); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service

        final InputStreamEntity inputStreamEntity = new InputStreamEntity(instream, length);
View Full Code Here

                // check for interruption
                checkInterruption();

                // process the next hyperlink
                nextUrl = nextEntry.getKey();
                final String u = nextUrl.toNormalform(true, true, false, true);
                if (!(u.startsWith("http://") || u.startsWith("https://") || u.startsWith("ftp://") || u.startsWith("smb://") || u.startsWith("file://"))) continue;
                // enqueue the hyperlink into the pre-notice-url db
                try {
                    this.crawlStacker.enqueueEntry(new Request(
                            response.initiator(),
View Full Code Here

                      (thishost.startsWith("www.") && url.getHost().endsWith(thishost.substring(4)))))) {
                    this.inboundlinks.put(url, "anchor" + (noindex ? " noindex" : "") + (nofollow ? " nofollow" : ""));
                } else {
                    this.outboundlinks.put(url, "anchor" + (noindex ? " noindex" : "") + (nofollow ? " nofollow" : ""));
                }
                u = url.toNormalform(true, false);
                final String name = entry.getValue().getProperty("name", "");
                if (u.startsWith("mailto:")) {
                    this.emaillinks.put(u.substring(7), name);
                } else {
                    extpos = u.lastIndexOf('.');
View Full Code Here

                else if (o instanceof ImageEntry) url = ((ImageEntry) o).url();
                else {
                    assert false;
                    continue;
                }
                u = url.toNormalform(true, true);
                if (u.endsWith("/"))
                    u = u.substring(0, u.length() - 1);
                pos = u.lastIndexOf('/');
                while (pos > 8) {
                    l = u.length();
View Full Code Here

                else {
                    assert false;
                    continue loop;
                }
                if (url == null) continue loop;
                u = url.toNormalform(true, true);
                if ((pos = u.toLowerCase().indexOf("http://", 7)) > 0) {
                    i.remove();
                    u = u.substring(pos);
                    while ((pos = u.toLowerCase().indexOf("http://", 7)) > 0)
                        u = u.substring(pos);
View Full Code Here

            MultiProtocolURI url;
            while (j.hasNext()) {
                ientry = j.next();
                url = ientry.url();
                if (url == null) continue;
                insertTextToWords(url.toNormalform(false, false), 99, flag_cat_hasimage, this.RESULT_FLAGS, false, meaningLib);
                insertTextToWords(ientry.alt(), 99, flag_cat_hasimage, this.RESULT_FLAGS, true, meaningLib);
            }

            // finally check all words for missing flag entry
            final Iterator<Map.Entry<String, Word>> k = this.words.entrySet().iterator();
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.