Package net.yacy.cora.document

Examples of net.yacy.cora.document.MultiProtocolURI


     * @param length the contentlength
     * @throws IOException
     */
    public void POST(final String uri, final InputStream instream, final long length) throws IOException {
      if (this.currentRequest != null) throw new IOException("Client is in use!");
        final MultiProtocolURI url = new MultiProtocolURI(uri);
        final HttpPost httpPost = new HttpPost(url.toNormalform(true, false, true, false));
        String host = url.getHost();
        if (host == null) host = "127.0.0.1";
        setHost(host); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
        final NonClosingInputStreamEntity inputStreamEntity = new NonClosingInputStreamEntity(instream, length);
      // statistics
      this.upbytes = length;
View Full Code Here


     * @param parts to post
     * @return content bytes
     * @throws IOException
     */
    public byte[] POSTbytes(final String uri, final Map<String, ContentBody> parts, final boolean usegzip) throws IOException {
        final MultiProtocolURI url = new MultiProtocolURI(uri);
        return POSTbytes(url, url.getHost(), parts, usegzip);
    }
View Full Code Here

     * @param length the length of the stream
     * @return content bytes
     * @throws IOException
     */
    public byte[] POSTbytes(final String uri, final InputStream instream, final long length) throws IOException {
        final MultiProtocolURI url = new MultiProtocolURI(uri);
        final HttpPost httpPost = new HttpPost(url.toNormalform(true, false, true, false));
        String host = url.getHost();
        if (host == null) host = "127.0.0.1";
        setHost(host); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service

        final InputStreamEntity inputStreamEntity = new InputStreamEntity(instream, length);
        // statistics
View Full Code Here

        int importCount = 0;

        Map<MultiProtocolURI, Properties> links = new HashMap<MultiProtocolURI, Properties>();
        String title;
        MultiProtocolURI url;
        Bookmark bm;
        final Set<String> tags=ListManager.string2set(tag); //this allow multiple default tags
        try {
            //load the links
            final ContentScraper scraper = new ContentScraper(baseURL);
            //OutputStream os = new htmlFilterOutputStream(null, scraper, null, false);
            final Writer writer= new TransformerWriter(null,null,scraper, null, false);
            FileUtils.copy(input,writer);
            writer.close();
            links = scraper.getAnchors();
        } catch (final IOException e) { Log.logWarning("BOOKMARKS", "error during load of links: "+ e.getClass() +" "+ e.getMessage());}
        for (final Entry<MultiProtocolURI, Properties> link: links.entrySet()) {
            url = link.getKey();
            title = link.getValue().getProperty("name", "");
            Log.logInfo("BOOKMARKS", "links.get(url)");
            if ("".equals(title)) {//cannot be displayed
                title = url.toString();
            }
            bm = db.new Bookmark(new DigestURI(url));
            bm.setProperty(Bookmark.BOOKMARK_TITLE, title);
            bm.setTags(tags);
            bm.setPublic(importPublic);
View Full Code Here

        }
       
        final RSSFeed feed = rssReader.getFeed();
        //RSSMessage channel = feed.getChannel();
        final List<Document> docs = new ArrayList<Document>();
        MultiProtocolURI uri;
        Set<String> languages;
        Map<MultiProtocolURI, Properties> anchors;
        Document doc;
        for (final Hit item: feed) try {
            uri = new MultiProtocolURI(item.getLink());
            languages = new HashSet<String>();
            languages.add(item.getLanguage());
            anchors = new HashMap<MultiProtocolURI, Properties>();
            Properties p = new Properties();
            p.put("name", item.getTitle());
View Full Code Here

                final int idx = name.lastIndexOf('.');
                final String mime = TextParser.mimeOf((idx >= 0) ? name.substring(idx + 1) : "");
                try {
                    tmp = FileUtils.createTempFile(this.getClass(), name);
                    FileUtils.copy(zis, tmp, entry.getSize());
                    final MultiProtocolURI virtualURL = MultiProtocolURI.newURL(url, "#" + name);
                    //this.log.logInfo("ZIP file parser: " + virtualURL.toNormalform(false, false));
                    docs = TextParser.parseSource(virtualURL, mime, null, tmp, false);
                    if (docs == null) continue;
                    for (final Document d: docs) docacc.add(d);
                } catch (final Parser.Failure e) {
View Full Code Here

                        }
                        parsedDataText.append("\r\n");
                        parsedData.clear();
                    } else if (key.toUpperCase().startsWith("URL")) {
                        try {
                            final MultiProtocolURI newURL = new MultiProtocolURI(value);
                            final Properties p = new Properties();
                            p.put("name", newURL.toString());
                            anchors.put(newURL, p);
                            //parsedData.put(key,value);
                        } catch (final MalformedURLException ex) {/* ignore this */}
                    } else if (
                            !key.equalsIgnoreCase("BEGIN") &&
View Full Code Here

            final String[] imgprots = new String[imagesc.size()];
            final String[] imgstubs = new String[imagesc.size()];
            final String[] imgalts  = new String[imagesc.size()];
            c = 0;
            for (final ImageEntry ie: imagesc) {
                final MultiProtocolURI uri = ie.url();
                imgtags[c] = ie.toString();
                imgprots[c] = uri.getProtocol();
                imgstubs[c] = uri.toString().substring(imgprots[c].length() + 3);
                imgalts[c] = ie.alt();
                c++;
            }
            addSolr(solrdoc, "imagescount_i", imgtags.length);
            if (isEmpty() || contains("attr_images_tag")) addSolr(solrdoc, "attr_images_tag", imgtags);
View Full Code Here

        for (int i = 0; i < transmissionQueueCount; i++) {
            this.transmissionQueue[i] = new ArrayBlockingQueue<SolrInputDocument>(transmissionQueueSize);
        }

        // connect using authentication
        final MultiProtocolURI u = new MultiProtocolURI(this.solrurl);
        this.host = u.getHost();
        this.port = u.getPort();
        this.solrpath = u.getPath();
        final String userinfo = u.getUserInfo();
        if (userinfo == null || userinfo.length() == 0) {
            this.solraccount = ""; this.solrpw = "";
        } else {
            final int p = userinfo.indexOf(':');
            if (p < 0) {
View Full Code Here

            // add all images also to the crawl stack
            hl.putAll(Document.getImagelinks(documents));

            // insert those hyperlinks to the crawler
            MultiProtocolURI nextUrl;
            for (final Map.Entry<MultiProtocolURI, String> nextEntry : hl.entrySet()) {
                // check for interruption
                checkInterruption();

                // process the next hyperlink
                nextUrl = nextEntry.getKey();
                final String u = nextUrl.toNormalform(true, true, false, true);
                if (!(u.startsWith("http://") || u.startsWith("https://") || u.startsWith("ftp://") || u.startsWith("smb://") || u.startsWith("file://"))) continue;
                // enqueue the hyperlink into the pre-notice-url db
                try {
                    this.crawlStacker.enqueueEntry(new Request(
                            response.initiator(),
View Full Code Here

TOP

Related Classes of net.yacy.cora.document.MultiProtocolURI

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.