Examples of URIMetadataRow


Examples of net.yacy.kelondro.data.meta.URIMetadataRow

            prop.put("lurlexport", 0);
            prop.put("reload", 0);
        }

        if (post.containsKey("urlhashdelete")) {
            final URIMetadataRow entry = segment.urlMetadata().load(ASCII.getBytes(urlhash));
            if (entry == null) {
                prop.putHTML("result", "No Entry for URL hash " + urlhash + "; nothing deleted.");
            } else {
                urlstring = entry.metadata().url().toNormalform(false, true);
                prop.put("urlstring", "");
                sb.urlRemove(segment, urlhash.getBytes());
                prop.putHTML("result", "Removed URL " + urlstring);
            }
            prop.put("lurlexport", 0);
            prop.put("reload", 0);
        }

        if (post.containsKey("urldelete")) {
            try {
                urlhash = ASCII.String((new DigestURI(urlstring)).hash());
            } catch (final MalformedURLException e) {
                urlhash = null;
            }
            if ((urlhash == null) || (urlstring == null)) {
                prop.put("result", "No input given; nothing deleted.");
            } else {
                sb.urlRemove(segment, urlhash.getBytes());
                prop.putHTML("result", "Removed URL " + urlstring);
            }
            prop.put("lurlexport", 0);
            prop.put("reload", 0);
        }

        if (post.containsKey("urlstringsearch")) {
            try {
                final DigestURI url = new DigestURI(urlstring);
                urlhash = ASCII.String(url.hash());
                prop.put("urlhash", urlhash);
                final URIMetadataRow entry = segment.urlMetadata().load(ASCII.getBytes(urlhash));
                if (entry == null) {
                    prop.putHTML("result", "No Entry for URL " + url.toNormalform(true, true));
                    prop.putHTML("urlstring", urlstring);
                    prop.put("urlhash", "");
                } else {
                    prop.putAll(genUrlProfile(segment, entry, urlhash));
                    prop.put("statistics", 0);
                }
            } catch (final MalformedURLException e) {
                prop.putHTML("result", "bad url: " + urlstring);
                prop.put("urlhash", "");
            }
            prop.put("lurlexport", 0);
            prop.put("reload", 0);
        }

        if (post.containsKey("urlhashsearch")) {
            final URIMetadataRow entry = segment.urlMetadata().load(ASCII.getBytes(urlhash));
            if (entry == null) {
                prop.putHTML("result", "No Entry for URL hash " + urlhash);
            } else {
                prop.putHTML("urlstring", entry.metadata().url().toNormalform(false, true));
                prop.putAll(genUrlProfile(segment, entry, urlhash));
                prop.put("statistics", 0);
            }
            prop.put("lurlexport", 0);
            prop.put("reload", 0);
        }

        // generate list
        if (post.containsKey("urlhashsimilar")) {
            try {
                final Iterator<URIMetadataRow> entryIt = new RotateIterator<URIMetadataRow>(segment.urlMetadata().entries(true, urlhash), ASCII.String(Base64Order.zero((urlhash == null ? 0 : urlhash.length()))), segment.termIndex().sizesMax());
                final StringBuilder result = new StringBuilder("Sequential List of URL-Hashes:<br />");
                URIMetadataRow entry;
                i = 0;
                int rows = 0, cols = 0;
                prop.put("urlhashsimilar", "1");
                while (entryIt.hasNext() && i < 256) {
                    entry = entryIt.next();
                    if (entry == null) break;
                    prop.put("urlhashsimilar_rows_"+rows+"_cols_"+cols+"_urlHash", ASCII.String(entry.hash()));
                    cols++;
                    if (cols==8) {
                        prop.put("urlhashsimilar_rows_"+rows+"_cols", cols);
                        cols = 0;
                        rows++;
View Full Code Here

Examples of net.yacy.kelondro.data.meta.URIMetadataRow

            prop.put("genUrlProfile", "1");
            prop.put("genUrlProfile_urlhash", urlhash);
            return prop;
        }
        final URIMetadataRow.Components metadata = entry.metadata();
        final URIMetadataRow le = (entry.referrerHash() == null || entry.referrerHash().length != Word.commonHashLength) ? null : segment.urlMetadata().load(entry.referrerHash());
        if (metadata == null || metadata.url() == null) {
            prop.put("genUrlProfile", "1");
            prop.put("genUrlProfile_urlhash", urlhash);
            return prop;
        }
        prop.put("genUrlProfile", "2");
        prop.putHTML("genUrlProfile_urlNormalform", metadata.url().toNormalform(false, true));
        prop.put("genUrlProfile_urlhash", urlhash);
        prop.put("genUrlProfile_urlDescr", metadata.dc_title());
        prop.put("genUrlProfile_moddate", entry.moddate().toString());
        prop.put("genUrlProfile_loaddate", entry.loaddate().toString());
        prop.put("genUrlProfile_referrer", (le == null) ? 0 : 1);
        prop.putHTML("genUrlProfile_referrer_url", (le == null) ? "<unknown>" : le.metadata().url().toNormalform(false, true));
        prop.put("genUrlProfile_referrer_hash", (le == null) ? "" : ASCII.String(le.hash()));
        prop.put("genUrlProfile_doctype", String.valueOf(entry.doctype()));
        prop.put("genUrlProfile_language", entry.language());
        prop.put("genUrlProfile_size", entry.size());
        prop.put("genUrlProfile_wordCount", entry.wordCount());
        return prop;
View Full Code Here

Examples of net.yacy.kelondro.data.meta.URIMetadataRow

            int received = 0;
            int blocked = 0;
            final int sizeBefore = sb.indexSegments.urlMetadata(Segments.Process.DHTIN).size();
            // read the urls from the other properties and store
            String urls;
            URIMetadataRow lEntry;
            for (int i = 0; i < urlc; i++) {
                serverCore.checkInterruption();

                // read new lurl-entry
                urls = post.get("url" + i);
                if (urls == null) {
                    if (Network.log.isFine()) Network.log.logFine("transferURL: got null URL-string from peer " + otherPeerName);
                    blocked++;
                    continue;
                }

                // parse new lurl-entry
                lEntry = URIMetadataRow.importEntry(urls);
                if (lEntry == null) {
                    Network.log.logWarning("transferURL: received invalid URL (entry null) from peer " + otherPeerName + "\n\tURL Property: " + urls);
                    blocked++;
                    continue;
                }

                // check if entry is well-formed
                final URIMetadataRow.Components metadata = lEntry.metadata();
                if (metadata == null || metadata.url() == null) {
                    Network.log.logWarning("transferURL: received invalid URL from peer " + otherPeerName + "\n\tURL Property: " + urls);
                    blocked++;
                    continue;
                }

                // check whether entry is too old
                if (lEntry.freshdate().getTime() <= freshdate) {
                    if (Network.log.isFine()) Network.log.logFine("transerURL: received too old URL from peer " + otherPeerName + ": " + lEntry.freshdate());
                    blocked++;
                    continue;
                }

                // check if the entry is blacklisted
                if ((blockBlacklist) && (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, metadata.url()))) {
                    if (Network.log.isFine()) Network.log.logFine("transferURL: blocked blacklisted URL '" + metadata.url().toNormalform(false, true) + "' from peer " + otherPeerName);
                    lEntry = null;
                    blocked++;
                    continue;
                }

                // check if the entry is in our network domain
                final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomain(metadata.url());
                if (urlRejectReason != null) {
                    if (Network.log.isFine()) Network.log.logFine("transferURL: blocked URL '" + metadata.url() + "' (" + urlRejectReason + ") from peer " + otherPeerName);
                    lEntry = null;
                    blocked++;
                    continue;
                }

                // write entry to database
                if (Network.log.isFine()) Network.log.logFine("Accepting URL " + i + "/" + urlc + " from peer " + otherPeerName + ": " + lEntry.metadata().url().toNormalform(true, false));
                try {
                    sb.indexSegments.urlMetadata(Segments.Process.DHTIN).store(lEntry);
                    ResultURLs.stack(lEntry, iam.getBytes(), iam.getBytes(), EventOrigin.DHT_TRANSFER);
                    if (Network.log.isFine()) Network.log.logFine("transferURL: received URL '" + metadata.url().toNormalform(false, true) + "' from peer " + otherPeerName);
                    received++;
View Full Code Here

Examples of net.yacy.kelondro.data.meta.URIMetadataRow

            // retrieve a list of urls from the LURL-db by a given list of url hashes
            final String urlhashes = post.get("hashes", "");
            if (urlhashes.length() % 12 != 0) return prop;
            final int count = urlhashes.length() / 12;
          int c = 0;
          URIMetadataRow entry;
          URIMetadataRow.Components metadata;
            DigestURI referrer;
            for (int i = 0; i < count; i++) {
                entry = sb.indexSegments.urlMetadata(Segments.Process.PUBLIC).load(ASCII.getBytes(urlhashes.substring(12 * i, 12 * (i + 1))));
                if (entry == null) continue;
                // find referrer, if there is one
                referrer = sb.getURL(Segments.Process.PUBLIC, entry.referrerHash());
                // create RSS entry
                metadata = entry.metadata();
                prop.put("item_" + c + "_title", metadata.dc_title());
                prop.putXML("item_" + c + "_link", metadata.url().toNormalform(true, false));
                prop.putXML("item_" + c + "_referrer", (referrer == null) ? "" : referrer.toNormalform(true, false));
                prop.putXML("item_" + c + "_description", metadata.dc_title());
                prop.put("item_" + c + "_author", metadata.dc_creator());
                prop.put("item_" + c + "_pubDate", GenericFormatter.SHORT_SECOND_FORMATTER.format(entry.moddate()));
                prop.put("item_" + c + "_guid", ASCII.String(entry.hash()));
                c++;
            }
            prop.put("item", c);
            prop.putXML("response", "ok");
        }
View Full Code Here

Examples of net.yacy.kelondro.data.meta.URIMetadataRow

        int size = 0;
        boolean pre = false;

        // get the url hash from which the content should be loaded
        String urlHash = post.get("urlHash", "");
        URIMetadataRow urlEntry = null;
        // get the urlEntry that belongs to the url hash
        if (urlHash.length() > 0 && (urlEntry = indexSegment.urlMetadata().load(ASCII.getBytes(urlHash))) != null) {
            // get the url that belongs to the entry
            final URIMetadataRow.Components metadata = urlEntry.metadata();
            if ((metadata == null) || (metadata.url() == null)) {
                prop.put("error", "3");
                prop.put("viewMode", VIEW_MODE_NO_TEXT);
                return prop;
            }
            url = metadata.url();
            descr = metadata.dc_title();
            //urlEntry.wordCount();
            size = urlEntry.size();
            pre = urlEntry.flags().get(Condenser.flag_cat_indexof);
        }

        prop.put("error_inurldb", urlEntry == null ? 0 : 1);

        // alternatively, get the url simply from a url String
View Full Code Here

Examples of net.yacy.kelondro.data.meta.URIMetadataRow

            prop.put("table_showURL",     (showURL) ? "1" : "0");

            boolean dark = true;
            String urlstr, urltxt;
            Seed initiatorSeed, executorSeed;
            URIMetadataRow urle;
            URIMetadataRow.Components metadata;

            int cnt = 0;
            final Iterator<Map.Entry<String, InitExecEntry>> i = ResultURLs.results(tabletype);
            Map.Entry<String, InitExecEntry> entry;
            while (i.hasNext()) {
                entry = i.next();
                try {
                    urle = sb.indexSegments.urlMetadata(Segments.Process.LOCALCRAWLING).load(UTF8.getBytes(entry.getKey()));
                    if (urle == null) {
                        Log.logWarning("PLASMA", "CrawlResults: URL not in index with url hash " + entry.getKey());
                        urlstr = null;
                        urltxt = null;
                        metadata = null;
                        continue;
                    }
                    metadata = urle.metadata();
                    urlstr = metadata.url().toNormalform(false, true);
                    urltxt = nxTools.shortenURLString(urlstr, 72); // shorten the string text like a URL

                    initiatorSeed = entry.getValue() == null || entry.getValue().initiatorHash == null ? null : sb.peers.getConnected(ASCII.String(entry.getValue().initiatorHash));
                    executorSeed = entry.getValue() == null || entry.getValue().executorHash == null ? null : sb.peers.getConnected(ASCII.String(entry.getValue().executorHash));

                    prop.put("table_indexed_" + cnt + "_dark", (dark) ? "1" : "0");
                    prop.put("table_indexed_" + cnt + "_feedbackpage", "CrawlResults.html");
                    prop.put("table_indexed_" + cnt + "_tabletype", tabletype.getCode());
                    prop.put("table_indexed_" + cnt + "_urlhash", entry.getKey());

                    if (showInit) {
                        prop.put("table_indexed_" + cnt + "_showInit", "1");
                        prop.put("table_indexed_" + cnt + "_showInit_initiatorSeed", (initiatorSeed == null) ? "unknown" : initiatorSeed.getName());
                    } else
                        prop.put("table_indexed_" + cnt + "_showInit", "0");

                    if (showExec) {
                        prop.put("table_indexed_" + cnt + "_showExec", "1");
                        prop.put("table_indexed_" + cnt + "_showExec_executorSeed", (executorSeed == null) ? "unknown" : executorSeed.getName());
                    } else
                        prop.put("table_indexed_" + cnt + "_showExec", "0");

                    if (showDate && urle != null) {
                        prop.put("table_indexed_" + cnt + "_showDate", "1");
                        prop.put("table_indexed_" + cnt + "_showDate_modified", daydate(urle.moddate()));
                    } else
                        prop.put("table_indexed_" + cnt + "_showDate", "0");

                    if (showWords && urle != null) {
                        prop.put("table_indexed_" + cnt + "_showWords", "1");
                        prop.put("table_indexed_" + cnt + "_showWords_count", urle.wordCount());
                    } else
                        prop.put("table_indexed_" + cnt + "_showWords", "0");

                    if (showTitle) {
                        prop.put("table_indexed_" + cnt + "_showTitle", (showTitle) ? "1" : "0");
View Full Code Here

Examples of net.yacy.kelondro.data.meta.URIMetadataRow

            return "post url not allowed";
        }

        // check if the url is double registered
        final String dbocc = this.nextQueue.urlExists(url.hash()); // returns the name of the queue if entry exists
        final URIMetadataRow oldEntry = this.indexSegment.urlMetadata().load(url.hash());
        if (oldEntry == null) {
            if (dbocc != null) {
                // do double-check
                if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' is double registered in '" + dbocc + "'.");
                if (dbocc.equals("errors")) {
                    final ZURL.Entry errorEntry = this.nextQueue.errorURL.get(url.hash());
                    return "double in: errors (" + errorEntry.anycause() + ")";
                } else {
                    return "double in: " + dbocc;
                }
            }
        } else {
            final boolean recrawl = profile.recrawlIfOlder() > oldEntry.loaddate().getTime();
            if (recrawl) {
                if (this.log.isInfo())
                    this.log.logInfo("RE-CRAWL of URL '" + urlstring + "': this url was crawled " +
                        ((System.currentTimeMillis() - oldEntry.loaddate().getTime()) / 60000 / 60 / 24) + " days ago.");
            } else {
                if (dbocc == null) {
                    return "double in: LURL-DB";
                } else {
                    if (this.log.isInfo()) this.log.logInfo("URL '" + urlstring + "' is double registered in '" + dbocc + "'. " + "Stack processing time:");
View Full Code Here

Examples of net.yacy.kelondro.data.meta.URIMetadataRow

                    prop.put("mode_feed", "0");
                } else {
                    final BookmarksDB.Bookmark bookmark = sb.bookmarksDB.getBookmark(urlHash);
                    if (bookmark == null) {
                        // try to get the bookmark from the LURL database
                        final URIMetadataRow urlentry = sb.indexSegments.urlMetadata(Segments.Process.PUBLIC).load(ASCII.getBytes(urlHash));
                        if (urlentry != null) try {
                            final URIMetadataRow.Components metadata = urlentry.metadata();
                            final Document document = Document.mergeDocuments(metadata.url(), null, sb.loader.loadDocuments(sb.loader.request(metadata.url(), true, false), CacheStrategy.IFEXIST, 5000, Integer.MAX_VALUE));
                            prop.put("mode_edit", "0"); // create mode
                            prop.put("mode_url", metadata.url().toNormalform(false, true));
                            prop.putHTML("mode_title", metadata.dc_title());
                            prop.putHTML("mode_description", (document == null) ? metadata.dc_title(): document.dc_title());
View Full Code Here

Examples of net.yacy.kelondro.data.meta.URIMetadataRow

                if (!sb.verifyAuthentication(header)) {
                    prop.put("AUTHENTICATE", "admin log-in"); // force log-in
                    return prop;
                }
                final String recommendHash = post.get("recommendref", ""); // urlhash
                final URIMetadataRow urlentry = indexSegment.urlMetadata().load(UTF8.getBytes(recommendHash));
                if (urlentry != null) {
                    final URIMetadataRow.Components metadata = urlentry.metadata();
                    Document[] documents = null;
                    try {
                        documents = sb.loader.loadDocuments(sb.loader.request(metadata.url(), true, false), CacheStrategy.IFEXIST, 5000, Integer.MAX_VALUE);
                    } catch (final IOException e) {
                    } catch (final Parser.Failure e) {
                    }
                    if (documents != null) {
                        // create a news message
                        final Map<String, String> map = new HashMap<String, String>();
                        map.put("url", metadata.url().toNormalform(false, true).replace(',', '|'));
                        map.put("title", metadata.dc_title().replace(',', ' '));
                        map.put("description", documents[0].dc_title().replace(',', ' '));
                        map.put("author", documents[0].dc_creator());
                        map.put("tags", documents[0].dc_subject(' '));
                        sb.peers.newsPool.publishMyNews(sb.peers.mySeed(), NewsPool.CATEGORY_SURFTIPP_ADD, map);
                        documents[0].close();
                    }
                }
            }

            // if a bookmarks-button was hit, create new bookmark entry
            if (post != null && post.containsKey("bookmarkref")) {
                if (!sb.verifyAuthentication(header)) {
                    prop.put("AUTHENTICATE", "admin log-in"); // force log-in
                    return prop;
                }
                final String bookmarkHash = post.get("bookmarkref", ""); // urlhash
                final URIMetadataRow urlentry = indexSegment.urlMetadata().load(UTF8.getBytes(bookmarkHash));
                if (urlentry != null) {
                    final URIMetadataRow.Components metadata = urlentry.metadata();
                    try {
                        sb.tables.bookmarks.createBookmark(sb.loader, metadata.url(), YMarkTables.USER_ADMIN, true, "searchresult", "/search");
                    } catch (final Throwable e) {
                    }
                }
View Full Code Here

Examples of net.yacy.kelondro.data.meta.URIMetadataRow

        @Override
        public void run() {

            // start fetching urls and snippets
            URIMetadataRow page;
            ResultEntry resultEntry;
            //final int fetchAhead = snippetMode == 0 ? 0 : 10;
            final boolean nav_topics = SnippetProcess.this.query.navigators.equals("all") || SnippetProcess.this.query.navigators.indexOf("topics",0) >= 0;
            try {
                //System.out.println("DEPLOYED WORKER " + id + " FOR " + this.neededResults + " RESULTS, timeoutd = " + (this.timeout - System.currentTimeMillis()));
                int loops = 0;
                while (this.shallrun && System.currentTimeMillis() < this.timeout) {
                    this.lastLifeSign = System.currentTimeMillis();

                    if (MemoryControl.shortStatus()) {
                      break;
                    }

                    // check if we have enough
                    if (SnippetProcess.this.result.sizeAvailable() >= this.neededResults) {
                        //Log.logWarning("ResultFetcher", SnippetProcess.this.result.sizeAvailable() + " = result.sizeAvailable() >= this.neededResults = " + this.neededResults);
                        break;
                    }

                    // check if we can succeed if we try to take another url
                    if (SnippetProcess.this.rankingProcess.feedingIsFinished() && SnippetProcess.this.rankingProcess.sizeQueue() == 0) {
                        //Log.logWarning("ResultFetcher", "rankingProcess.feedingIsFinished() && rankingProcess.sizeQueue() == 0");
                        break;
                    }

                    // get next entry
                    page = SnippetProcess.this.rankingProcess.takeURL(true, Math.min(100, this.timeout - System.currentTimeMillis()));
                    //if (page != null) Log.logInfo("ResultFetcher", "got one page: " + page.metadata().url().toNormalform(true, false));
                    //if (page == null) page = rankedCache.takeURL(false, this.timeout - System.currentTimeMillis());
                    if (page == null) {
                        //Log.logWarning("ResultFetcher", "page == null");
                        break; // no more available
                    }
                    if (SnippetProcess.this.query.filterfailurls && SnippetProcess.this.workTables.failURLsContains(page.hash())) continue;

                    // in case that we have an attached solr, we load also the solr document
                    String solrContent = null;
                    if (this.solr != null) {
                        SolrDocument sd = null;
                        final SolrDocumentList sdl = this.solr.get("id:" + ASCII.String(page.hash()), 0, 1);
                        if (sdl.size() > 0) sd = sdl.get(0);
                        if (sd != null) solrContent = this.solr.getScheme().solrGetText(sd);
                    }

                    loops++;
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.