Examples of URIMetadataRow


Examples of net.yacy.kelondro.data.meta.URIMetadataRow

            prop.put("searchresult", 3);
            prop.put("genUrlList_flags", (flags == null) ? "" : flags.exportB64());
            prop.put("genUrlList_lines", maxlines);
            int i = 0;
            DigestURI url;
            URIMetadataRow entry;
            String us;
            long rn = -1;
            while (!ranked.isEmpty() && (entry = ranked.takeURL(false, 1000)) != null) {
                if ((entry == null) || (entry.metadata() == null)) continue;
                url = entry.metadata().url();
                if (url == null) continue;
                us = url.toNormalform(false, false);
                if (rn == -1) rn = entry.ranking();
                prop.put("genUrlList_urlList_"+i+"_urlExists", "1");
                prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxCount", i);
                prop.putHTML("genUrlList_urlList_"+i+"_urlExists_urlhxValue", entry.word().urlhash());
                prop.putHTML("genUrlList_urlList_"+i+"_urlExists_keyString", keystring);
                prop.put("genUrlList_urlList_"+i+"_urlExists_keyHash", keyhashs);
                prop.putHTML("genUrlList_urlList_"+i+"_urlExists_urlString", us);
                prop.put("genUrlList_urlList_"+i+"_urlExists_urlStringShort", (us.length() > 40) ? (us.substring(0, 20) + "<br>" + us.substring(2040) + "...") : ((us.length() > 30) ? (us.substring(0, 20) + "<br>" + us.substring(20)) : us));
                prop.putNum("genUrlList_urlList_"+i+"_urlExists_ranking", (entry.ranking() - rn));
                prop.putNum("genUrlList_urlList_"+i+"_urlExists_domlength", DigestURI.domLengthEstimation(entry.hash()));
                prop.putNum("genUrlList_urlList_"+i+"_urlExists_ybr", BlockRank.ranking(entry.hash()));
                prop.putNum("genUrlList_urlList_"+i+"_urlExists_tf", 1000.0 * entry.word().termFrequency());
                prop.putNum("genUrlList_urlList_"+i+"_urlExists_authority", (ranked.getOrder() == null) ? -1 : ranked.getOrder().authority(ASCII.String(entry.hash(), 6, 6)));
                prop.put("genUrlList_urlList_"+i+"_urlExists_date", GenericFormatter.SHORT_DAY_FORMATTER.format(new Date(entry.word().lastModified())));
                prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintitle", entry.word().wordsintitle());
                prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintext", entry.word().wordsintext());
                prop.putNum("genUrlList_urlList_"+i+"_urlExists_phrasesintext", entry.word().phrasesintext());
                prop.putNum("genUrlList_urlList_"+i+"_urlExists_llocal", entry.word().llocal());
                prop.putNum("genUrlList_urlList_"+i+"_urlExists_lother", entry.word().lother());
                prop.putNum("genUrlList_urlList_"+i+"_urlExists_hitcount", entry.word().hitcount());
                prop.putNum("genUrlList_urlList_"+i+"_urlExists_worddistance", 0);
                prop.putNum("genUrlList_urlList_"+i+"_urlExists_ybr", BlockRank.ranking(entry.hash()));
                prop.putNum("genUrlList_urlList_"+i+"_urlExists_pos", entry.word().minposition());
                prop.putNum("genUrlList_urlList_"+i+"_urlExists_phrase", entry.word().posofphrase());
                prop.putNum("genUrlList_urlList_"+i+"_urlExists_posinphrase", entry.word().posinphrase());
                prop.putNum("genUrlList_urlList_"+i+"_urlExists_urlcomps", entry.word().urlcomps());
                prop.putNum("genUrlList_urlList_"+i+"_urlExists_urllength", entry.word().urllength());
                prop.put("genUrlList_urlList_"+i+"_urlExists_props",
                        ((entry.word().flags().get(Condenser.flag_cat_indexof)) ? "appears on index page, " : "") +
                        ((entry.word().flags().get(Condenser.flag_cat_hasimage)) ? "contains images, " : "") +
                        ((entry.word().flags().get(Condenser.flag_cat_hasaudio)) ? "contains audio, " : "") +
                        ((entry.word().flags().get(Condenser.flag_cat_hasvideo)) ? "contains video, " : "") +
                        ((entry.word().flags().get(Condenser.flag_cat_hasapp)) ? "contains applications, " : "") +
                        ((entry.word().flags().get(WordReferenceRow.flag_app_dc_identifier)) ? "appears in url, " : "") +
                        ((entry.word().flags().get(WordReferenceRow.flag_app_dc_title)) ? "appears in title, " : "") +
                        ((entry.word().flags().get(WordReferenceRow.flag_app_dc_creator)) ? "appears in author, " : "") +
                        ((entry.word().flags().get(WordReferenceRow.flag_app_dc_subject)) ? "appears in subject, " : "") +
                        ((entry.word().flags().get(WordReferenceRow.flag_app_dc_description)) ? "appears in description, " : "") +
                        ((entry.word().flags().get(WordReferenceRow.flag_app_emphasized)) ? "appears emphasized, " : "") +
                        ((DigestURI.probablyRootURL(entry.word().urlhash())) ? "probably root url" : "")
                );
                if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, url)) {
                    prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxChecked", "1");
                }
                i++;
View Full Code Here

Examples of net.yacy.kelondro.data.meta.URIMetadataRow

                if (references.containsKey(e.urlhash())) continue;
                if (badReferences.has(e.urlhash())) {
                    notFoundx.add(e.urlhash());
                    continue;
                }
                URIMetadataRow r = segment.urlMetadata().load(e.urlhash());
                if (r == null) {
                    notFoundx.add(e.urlhash());
                    badReferences.put(e.urlhash());
                } else {
                    references.put(e.urlhash(), r);
View Full Code Here

Examples of net.yacy.kelondro.data.meta.URIMetadataRow

            this.links = new ArrayList<URIMetadataRow>(this.urlcount);
            for (int n = 0; n < this.urlcount; n++) {
                // get one single search result
                final String resultLine = resultMap.get("resource" + n);
                if (resultLine == null) continue;
                final URIMetadataRow urlEntry = URIMetadataRow.importEntry(resultLine);
                if (urlEntry == null) continue;
                this.links.add(urlEntry);
            }
        }
View Full Code Here

Examples of net.yacy.kelondro.data.meta.URIMetadataRow

                        if (format == 1) pw.println("<a href=\"http://" + host + "\">" + host + "</a><br>");
                        count++;
                    }
                } else {
                    final Iterator<URIMetadataRow> i = entries(); // iterates indexURLEntry objects
                    URIMetadataRow entry;
                    URIMetadataRow.Components metadata;
                    String url;
                    while (i.hasNext()) {
                        entry = i.next();
                        if (this.set != null && !set.has(entry.hash())) continue;
                        metadata = entry.metadata();
                        url = metadata.url().toNormalform(true, false);
                        if (!url.matches(filter)) continue;
                        if (format == 0) {
                            pw.println(url);
                        }
                        if (format == 1) {
                            pw.println("<a href=\"" + url + "\">" + CharacterCoding.unicode2xml(metadata.dc_title(), true) + "</a><br>");
                        }
                        if (format == 2) {
                            pw.println("<item>");
                            pw.println("<title>" + CharacterCoding.unicode2xml(metadata.dc_title(), true) + "</title>");
                            pw.println("<link>" + MultiProtocolURI.escape(url) + "</link>");
                            if (metadata.dc_creator().length() > 0) pw.println("<author>" + CharacterCoding.unicode2xml(metadata.dc_creator(), true) + "</author>");
                            if (metadata.dc_subject().length() > 0) pw.println("<description>" + CharacterCoding.unicode2xml(metadata.dc_subject(), true) + "</description>");
                            pw.println("<pubDate>" + entry.moddate().toString() + "</pubDate>");
                            pw.println("<yacy:size>" + entry.size() + "</yacy:size>");
                            pw.println("<guid isPermaLink=\"false\">" + ASCII.String(entry.hash()) + "</guid>");
                            pw.println("</item>");
                        }
                        count++;
                    }
                }
View Full Code Here

Examples of net.yacy.kelondro.data.meta.URIMetadataRow

        ReferenceOrder order = new ReferenceOrder(query.ranking, UTF8.getBytes(query.targetlang));
        RankingProcess rankedCache = new RankingProcess(query, order, SearchEvent.max_results_preparation);
        rankedCache.start();
       
        // search is running; retrieve results
        URIMetadataRow row;
        ArrayList<DigestURI> files = new ArrayList<DigestURI>();
        Components metadata;
        while ((row = rankedCache.takeURL(false, 1000)) != null) {
            metadata = row.metadata();
            if (metadata == null) continue;
            files.add(metadata.url());
            count--;
            if (count == 0) break;
        }
View Full Code Here

Examples of net.yacy.kelondro.data.meta.URIMetadataRow

        }
       
        @Override
        public void run() {
            DigestURI f;
            URIMetadataRow resultRow;
            try {
                while ((f = queue.take()) != poison) try {
                    resultRow = add(f);
                    if (callback != null) {
                        if (resultRow == null) {
View Full Code Here

Examples of net.yacy.kelondro.data.meta.URIMetadataRow

      long timeleft;
      while ((timeleft = timeout - System.currentTimeMillis()) > 0) {
          //System.out.println("timeleft = " + timeleft);
            final WeakPriorityBlockingQueue.Element<WordReferenceVars> obrwi = takeRWI(skipDoubleDom, timeleft);
            if (obrwi == null) return null; // all time was already wasted in takeRWI to get another element
            final URIMetadataRow page = this.query.getSegment().urlMetadata().load(obrwi);
            if (page == null) {
              this.misses.add(obrwi.getElement().urlhash());
              continue;
            }

            // prepare values for constraint check
            final URIMetadataRow.Components metadata = page.metadata();

            // check errors
            if (metadata == null) {
                this.sortout++;
                continue; // rare case where the url is corrupted
            }

            if (!this.query.urlMask_isCatchall) {
                // check url mask
                if (!metadata.matches(this.query.urlMask)) {
                    this.sortout++;
                    continue;
                }

                // in case that we do not have e catchall filter for urls
                // we must also construct the domain navigator here
                //if (query.sitehash == null) {
                //    this.hostNavigator.inc(UTF8.String(urlhash, 6, 6));
                //    this.hostResolver.put(UTF8.String(urlhash, 6, 6), UTF8.String(urlhash));
                //}
            }

            // check for more errors
            if (metadata.url() == null) {
                this.sortout++;
                continue; // rare case where the url is corrupted
            }

            final String pageurl = metadata.url().toNormalform(true, true);
            final String pageauthor = metadata.dc_creator();
            final String pagetitle = metadata.dc_title().toLowerCase();

            // check exclusion
            if ((QueryParams.anymatch(pagetitle, this.query.excludeHashes)) ||
                (QueryParams.anymatch(pageurl.toLowerCase(), this.query.excludeHashes)) ||
                (QueryParams.anymatch(pageauthor.toLowerCase(), this.query.excludeHashes))) {
                this.sortout++;
                continue;
            }

            // check index-of constraint
            if ((this.query.constraint != null) &&
                (this.query.constraint.get(Condenser.flag_cat_indexof)) &&
                (!(pagetitle.startsWith("index of")))) {
                final Iterator<byte[]> wi = this.query.queryHashes.iterator();
                while (wi.hasNext()) {
                    this.query.getSegment().termIndex().removeDelayed(wi.next(), page.hash());
                }
                this.sortout++;
                continue;
            }

            // check location constraint
            if ((this.query.constraint != null) &&
                (this.query.constraint.get(Condenser.flag_cat_haslocation)) &&
                (metadata.lat() == 0.0f || metadata.lon() == 0.0f)) {
                this.sortout++;
                continue;
            }

            // check content domain
            if ((this.query.contentdom == ContentDomain.AUDIO && page.laudio() == 0) ||
                (this.query.contentdom == ContentDomain.VIDEO && page.lvideo() == 0) ||
                (this.query.contentdom == ContentDomain.IMAGE && page.limage() == 0) ||
                (this.query.contentdom == ContentDomain.APP && page.lapp() == 0)) {
                this.sortout++;
              continue;
            }

            // evaluate information of metadata for navigation
View Full Code Here

Examples of net.yacy.kelondro.data.meta.URIMetadataRow

    public ScoreMap<String> getHostNavigator() {
        final ScoreMap<String> result = new ConcurrentScoreMap<String>();
        if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("hosts") < 0) return result;

        final Iterator<String> domhashs = this.hostNavigator.keys(false);
        URIMetadataRow row;
        byte[] urlhash;
        String hosthash, hostname;
        if (this.hostResolver != null) while (domhashs.hasNext() && result.sizeSmaller(30)) {
            hosthash = domhashs.next();
            if (hosthash == null) continue;
            urlhash = this.hostResolver.get(hosthash);
            row = urlhash == null ? null : this.query.getSegment().urlMetadata().load(urlhash);
            hostname = row == null ? null : row.metadata().url().getHost();
            if (hostname != null) {
                result.set(hostname, this.hostNavigator.get(hosthash));
            }
        }
        if (result.sizeSmaller(2)) result.clear(); // navigators with one entry are not useful
View Full Code Here

Examples of net.yacy.kelondro.data.meta.URIMetadataRow

        byte[] urlHash = obrwi.getElement().urlhash();
        if (urlHash == null) return null;
        try {
            final Row.Entry entry = urlIndexFile.get(urlHash);
            if (entry == null) return null;
            return new URIMetadataRow(entry, obrwi.getElement(), obrwi.getWeight());
        } catch (final IOException e) {
            return null;
        }
    }
View Full Code Here

Examples of net.yacy.kelondro.data.meta.URIMetadataRow

        if (urlIndexFile == null) return null;
        if (urlHash == null) return null;
        try {
            final Row.Entry entry = urlIndexFile.get(urlHash);
            if (entry == null) return null;
            return new URIMetadataRow(entry, null, 0);
        } catch (final IOException e) {
            return null;
        }
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.