Package net.yacy.cora.document

Examples of net.yacy.cora.document.MultiProtocolURI


                    Log.logInfo("heuristicRSS", "rss result not parsed from " + feedName);
                    return;
                }

                final Map<MultiProtocolURI, String> links = new TreeMap<MultiProtocolURI, String>();
                MultiProtocolURI uri;
                for (final RSSMessage message: rss.getFeed()) try {
                    uri = new MultiProtocolURI(message.getLink());
                    links.put(uri, message.getTitle());
                } catch (final MalformedURLException e) {
                }

                Log.logInfo("heuristicRSS", "Heuristic: adding " + links.size() + " links from '" + feedName + "' rss feed");
View Full Code Here


    private void resortLinks() {
        if (this.resorted) return;
        synchronized (this) {
            if (this.resorted) return;
            // extract hyperlinks, medialinks and emaillinks from anchorlinks
            MultiProtocolURI url;
            String u;
            int extpos, qpos;
            String ext = null;
            final String thishost = this.source.getHost();
            this.inboundlinks = new HashMap<MultiProtocolURI, String>();
            this.outboundlinks = new HashMap<MultiProtocolURI, String>();
            this.hyperlinks = new HashMap<MultiProtocolURI, String>();
            this.videolinks = new HashMap<MultiProtocolURI, String>();
            this.audiolinks = new HashMap<MultiProtocolURI, String>();
            this.applinks   = new HashMap<MultiProtocolURI, String>();
            this.emaillinks = new HashMap<String, String>();
            final Map<MultiProtocolURI, ImageEntry> collectedImages = new HashMap<MultiProtocolURI, ImageEntry>(); // this is a set that is collected now and joined later to the imagelinks
            for (final Map.Entry<MultiProtocolURI, ImageEntry> entry: collectedImages.entrySet()) {
                if (entry.getKey().getHost().equals(thishost)) this.inboundlinks.put(entry.getKey(), "image"); else this.outboundlinks.put(entry.getKey(), "image");
            }
            for (final Map.Entry<MultiProtocolURI, Properties> entry: this.anchors.entrySet()) {
                url = entry.getKey();
                if (url == null) continue;
                final boolean noindex = entry.getValue().getProperty("rel", "").toLowerCase().indexOf("noindex",0) >= 0;
                final boolean nofollow = entry.getValue().getProperty("rel", "").toLowerCase().indexOf("nofollow",0) >= 0;
                if ((thishost == null && url.getHost() == null) ||
                    ((thishost != null && url.getHost() != null) &&
                     (url.getHost().endsWith(thishost) ||
                      (thishost.startsWith("www.") && url.getHost().endsWith(thishost.substring(4)))))) {
                    this.inboundlinks.put(url, "anchor" + (noindex ? " noindex" : "") + (nofollow ? " nofollow" : ""));
                } else {
                    this.outboundlinks.put(url, "anchor" + (noindex ? " noindex" : "") + (nofollow ? " nofollow" : ""));
                }
                u = url.toNormalform(true, false);
                final String name = entry.getValue().getProperty("name", "");
                if (u.startsWith("mailto:")) {
                    this.emaillinks.put(u.substring(7), name);
                } else {
                    extpos = u.lastIndexOf('.');
View Full Code Here

        // links is either a Set of Strings (urls) or a Set of
        // htmlFilterImageEntries
        final Set<String> h = new HashSet<String>();
        Iterator<?> i = links.iterator();
        Object o;
        MultiProtocolURI url;
        String u;
        int pos;
        int l;
        while (i.hasNext())
            try {
                o = i.next();
                if (o instanceof MultiProtocolURI) url = (MultiProtocolURI) o;
                else if (o instanceof String) url = new MultiProtocolURI((String) o);
                else if (o instanceof ImageEntry) url = ((ImageEntry) o).url();
                else {
                    assert false;
                    continue;
                }
                u = url.toNormalform(true, true);
                if (u.endsWith("/"))
                    u = u.substring(0, u.length() - 1);
                pos = u.lastIndexOf('/');
                while (pos > 8) {
                    l = u.length();
                    u = u.substring(0, pos + 1);
                    h.add(u);
                    u = u.substring(0, pos);
                    assert (u.length() < l) : "u = " + u;
                    pos = u.lastIndexOf('/');
                }
            } catch (final MalformedURLException e) { }
        // now convert the strings to yacyURLs
        i = h.iterator();
        final Map<MultiProtocolURI, String> v = new HashMap<MultiProtocolURI, String>();
        while (i.hasNext()) {
            u = (String) i.next();
            try {
                url = new MultiProtocolURI(u);
                v.put(url, "sub");
            } catch (final MalformedURLException e) {
            }
        }
        return v;
View Full Code Here

        // htmlFilterImageEntries
        // we find all links that are part of a reference inside a url
        final Map<MultiProtocolURI, String> v = new HashMap<MultiProtocolURI, String>();
        final Iterator<?> i = links.iterator();
        Object o;
        MultiProtocolURI url = null;
        String u;
        int pos;
        loop: while (i.hasNext())
            try {
                o = i.next();
                if (o instanceof MultiProtocolURI)
                    url = (MultiProtocolURI) o;
                else if (o instanceof String)
                    url = new MultiProtocolURI((String) o);
                else if (o instanceof ImageEntry)
                    url = ((ImageEntry) o).url();
                else {
                    assert false;
                    continue loop;
                }
                if (url == null) continue loop;
                u = url.toNormalform(true, true);
                if ((pos = u.toLowerCase().indexOf("http://", 7)) > 0) {
                    i.remove();
                    u = u.substring(pos);
                    while ((pos = u.toLowerCase().indexOf("http://", 7)) > 0)
                        u = u.substring(pos);
                    url = new MultiProtocolURI(u);
                    if (!(v.containsKey(url)))
                        v.put(url, "ref");
                    continue loop;
                }
                if ((pos = u.toLowerCase().indexOf("/www.", 7)) > 0) {
                    i.remove();
                    u = "http:/" + u.substring(pos);
                    while ((pos = u.toLowerCase().indexOf("/www.", 7)) > 0)
                        u = "http:/" + u.substring(pos);
                    url = new MultiProtocolURI(u);
                    if (!(v.containsKey(url)))
                        v.put(url, "ref");
                    continue loop;
                }
            } catch (final MalformedURLException e) {
View Full Code Here

            }
            //this.hostname = Domains.getHostName(this.inetAddress);
            return this.hostname;
        }
        public MultiProtocolURI url() throws MalformedURLException {
            return new MultiProtocolURI(this.protocol.name() + "://" + getHostName() + "/");
        }
View Full Code Here

            return new MultiProtocolURI(this.protocol.name() + "://" + getHostName() + "/");
        }
        @Override
        public String toString() {
            try {
                return new MultiProtocolURI(this.protocol.name() + "://" + this.inetAddress.getHostAddress() + "/").toNormalform(true, false);
            } catch (final MalformedURLException e) {
                return "";
            }
        }
View Full Code Here

                                access = Access.denied;
                            }
                        }
                        if (this.service.getProtocol() == Protocol.smb) {
                            try {
                                final MultiProtocolURI uri = new MultiProtocolURI(this.service.toString());
                                final String[] list = uri.list();
                                access = list == null || list.length == 0 ? Access.empty : Access.granted;
                            } catch (final IOException e) {
                                access = Access.denied;
                            }
                        }
View Full Code Here

            }

            // images
            final Iterator<ImageEntry> j = document.getImages().values().iterator();
            ImageEntry ientry;
            MultiProtocolURI url;
            while (j.hasNext()) {
                ientry = j.next();
                url = ientry.url();
                if (url == null) continue;
                insertTextToWords(url.toNormalform(false, false), 99, flag_cat_hasimage, this.RESULT_FLAGS, false, meaningLib);
                insertTextToWords(ientry.alt(), 99, flag_cat_hasimage, this.RESULT_FLAGS, true, meaningLib);
            }

            // finally check all words for missing flag entry
            final Iterator<Map.Entry<String, Word>> k = this.words.entrySet().iterator();
View Full Code Here

        if (url.isLocal()) return; // we do this only for global urls
        final Map<MultiProtocolURI, String> hl = document.getHyperlinks();
        final Iterator<MultiProtocolURI> it = hl.keySet().iterator();
        final HashSet<MultiProtocolURI> globalRefURLs = new HashSet<MultiProtocolURI>();
        final String refhost = url.getHost();
        MultiProtocolURI u;
        int maxref = 1000;
        while (it.hasNext() && maxref-- > 0) {
            u = it.next();
            if (u == null) continue;
            if (refhost != null && u.getHost() != null && !u.getHost().equals(refhost)) {
                // this is a global link
                globalRefURLs.add(u);
            }
        }
        final leanrefObject lro = new leanrefObject(url, globalRefURLs);
View Full Code Here

    }
    if( this._baseURL.indexOf( '?' ) >=0 ) {
      throw new RuntimeException( "Invalid base url for solrj.  The base URL must not contain parameters: "+this._baseURL );
    }

    MultiProtocolURI u;
    try {
        u = new MultiProtocolURI(this._baseURL.toString());
        this.host = u.getHost();
        this.port = u.getPort();
        final String userinfo = u.getUserInfo();
        if (userinfo == null || userinfo.length() == 0) {
            this.solraccount = ""; this.solrpw = "";
        } else {
            final int p = userinfo.indexOf(':');
            if (p < 0) {
View Full Code Here

TOP

Related Classes of net.yacy.cora.document.MultiProtocolURI

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.