Package net.yacy.cora.document

Examples of net.yacy.cora.document.MultiProtocolURI


    idNames.add("phpsessionid");

    MultiProtocolURI.initSessionIDNames(idNames);

    for (int i=0; i<testURIs.length; i++) {
      MultiProtocolURI uri = new MultiProtocolURI(testURIs[i][0]);
   
         assertEquals(uri.toNormalform(true, true, false, true), testURIs[i][1]);
    }
  }
View Full Code Here


     */
    public MultiProtocolURI getSitemap() {
        final String url = this.mem.containsKey(SITEMAP)? UTF8.String(this.mem.get(SITEMAP)): null;
        if (url == null) return null;
        try {
            return new MultiProtocolURI(url);
        } catch (final MalformedURLException e) {
            return null;
        }
    }
View Full Code Here

                    System.currentTimeMillis() - robotsTxt4Host.getLoadedDate().getTime() <= 1*24*60*60*1000) {
                    return robotsTxt4Host;
                }

                // generating the proper url to download the robots txt
                MultiProtocolURI robotsURL = null;
                try {
                    robotsURL = new MultiProtocolURI("http://" + urlHostPort + "/robots.txt");
                } catch (final MalformedURLException e) {
                    log.fatal("Unable to generate robots.txt URL for host:port '" + urlHostPort + "'.", e);
                    robotsURL = null;
                }

View Full Code Here

                } else {

                    redirectionUrlString = redirectionUrlString.trim();

                    // generating the new URL object
                    final MultiProtocolURI redirectionUrl = MultiProtocolURI.newURL(robotsURL, redirectionUrlString);

                    // following the redirection
                    if (log.isDebugEnabled()) log.debug("Redirection detected for robots.txt with URL '" + robotsURL + "'." +
                            "\nRedirecting request to: " + redirectionUrl);
                    return downloadRobotsTxt(redirectionUrl,redirectionCount,entry);
View Full Code Here

    }

    public final static void main(final String[] args) throws Exception {

        final String url = "http://www.badelatschen.net/robots.txt";
        final Object[] o = downloadRobotsTxt(new MultiProtocolURI(url), 0, null);
        if (o == null) {
            System.out.println("result: null");
        } else {
            System.out.println("not allowed = " + ((Boolean) o[0]).toString());
            System.out.println("robots = " + ((o[1] == null) ? "null" : UTF8.String((byte[]) o[1])));
View Full Code Here

            final int startRecord,
            final int maximumRecords,
            final CacheStrategy cacheStrategy,
            final boolean global,
            final String userAgent) throws IOException {
        MultiProtocolURI uri = null;
        try {
            uri = new MultiProtocolURI(rssSearchServiceURL);
        } catch (final MalformedURLException e) {
            throw new IOException("cora.Search failed asking peer '" + rssSearchServiceURL + "': bad url, " + e.getMessage());
        }

        // send request
        byte[] result = new byte[0];
        try {
            final LinkedHashMap<String,ContentBody> parts = new LinkedHashMap<String,ContentBody>();
            parts.put("query", UTF8.StringBody(query));
            parts.put("startRecord", UTF8.StringBody(Integer.toString(startRecord)));
            parts.put("maximumRecords", UTF8.StringBody(Long.toString(maximumRecords)));
            parts.put("verify", cacheStrategy == null ? UTF8.StringBody("false") : UTF8.StringBody(cacheStrategy.toName()));
            parts.put("resource", UTF8.StringBody(global ? "global" : "local"));
            parts.put("nav", UTF8.StringBody("none"));
            // result = HTTPConnector.getConnector(userAgent == null ? MultiProtocolURI.yacybotUserAgent : userAgent).post(new MultiProtocolURI(rssSearchServiceURL), (int) timeout, uri.getHost(), parts);
            final HTTPClient httpClient = new HTTPClient(userAgent == null ? ClientIdentification.getUserAgent() : userAgent, (int) timeout);
            result = httpClient.POSTbytes(new MultiProtocolURI(rssSearchServiceURL), uri.getHost(), parts, false);

            final RSSReader reader = RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result);
            if (reader == null) {
                throw new IOException("cora.Search failed asking peer '" + uri.getHost() + "': probably bad response from remote peer (1), reader == null");
            }
            final RSSFeed feed = reader.getFeed();
            if (feed == null) {
                // case where the rss reader does not understand the content
                throw new IOException("cora.Search failed asking peer '" + uri.getHost() + "': probably bad response from remote peer (2)");
            }
            return feed;
        } catch (final IOException e) {
            throw new IOException("cora.Search error asking peer '" + uri.getHost() + "':" + e.toString());
        }
    }
View Full Code Here

                urlEnd = contents.indexOf(linebreak,urlStart);
                url = contents.substring(urlStart,urlEnd);
                urlnr = Integer.toString(++urls).toString();
                final Properties p = new Properties();
                p.put("name", urlnr);
                anchors.put(new MultiProtocolURI(url), p);
                contents = contents.substring(0,urlStart)+contents.substring(urlEnd);
            }

           // As the result of parsing this function must return a plasmaParserDocument object
            return new Document[]{new Document(
View Full Code Here

     * @param maxBytes to get
     * @return content bytes
     * @throws IOException
     */
    public byte[] GETbytes(final String uri, final int maxBytes) throws IOException {
        return GETbytes(new MultiProtocolURI(uri), maxBytes);
    }
View Full Code Here

     * @param uri the url to get
     * @throws IOException
     */
    public void GET(final String uri) throws IOException {
        if (this.currentRequest != null) throw new IOException("Client is in use!");
        final MultiProtocolURI url = new MultiProtocolURI(uri);
        final HttpGet httpGet = new HttpGet(url.toNormalform(true, false, true, false));
        setHost(url.getHost()); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
        this.currentRequest = httpGet;
        execute(httpGet);
    }
View Full Code Here

     * @param uri the url to Response from
     * @return the HttpResponse
     * @throws IOException
     */
    public HttpResponse HEADResponse(final String uri) throws IOException {
        final MultiProtocolURI url = new MultiProtocolURI(uri);
        final HttpHead httpHead = new HttpHead(url.toNormalform(true, false, true, false));
        setHost(url.getHost()); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
      execute(httpHead);
      finish();
      ConnectionInfo.removeConnection(httpHead.hashCode());
      return this.httpResponse;
    }
View Full Code Here

TOP

Related Classes of net.yacy.cora.document.MultiProtocolURI

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.