Examples of HandleSet


Examples of net.yacy.kelondro.index.HandleSet

        public void run() {
            Log.logInfo("INDEXCLEANER", "IndexCleaner-Thread started");
            ReferenceContainer<WordReference> container = null;
            WordReferenceVars entry = null;
            DigestURI url = null;
            final HandleSet urlHashs = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
            try {
                Iterator<ReferenceContainer<WordReference>> indexContainerIterator = Segment.this.termIndex.references(this.startHash, false, 100, false).iterator();
                while (indexContainerIterator.hasNext() && this.run) {
                    waiter();
                    container = indexContainerIterator.next();
                    final Iterator<WordReference> containerIterator = container.entries();
                    this.wordHashNow = container.getTermHash();
                    while (containerIterator.hasNext() && this.run) {
                        waiter();
                        entry = new WordReferenceVars(containerIterator.next());
                        // System.out.println("Wordhash: "+wordHash+" UrlHash:
                        // "+entry.getUrlHash());
                        final URIMetadataRow ue = Segment.this.urlMetadata.load(entry.urlhash());
                        if (ue == null) {
                            urlHashs.put(entry.urlhash());
                        } else {
                            url = ue.metadata().url();
                            if (url == null || Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, url)) {
                                urlHashs.put(entry.urlhash());
                            }
                        }
                    }
                    if (!urlHashs.isEmpty()) try {
                        final int removed = Segment.this.termIndex.remove(container.getTermHash(), urlHashs);
                        Log.logFine("INDEXCLEANER", ASCII.String(container.getTermHash()) + ": " + removed + " of " + container.size() + " URL-entries deleted");
                        this.lastWordHash = container.getTermHash();
                        this.lastDeletionCounter = urlHashs.size();
                        urlHashs.clear();
                    } catch (final IOException e) {
                        Log.logException(e);
                    }

                    if (!containerIterator.hasNext()) {
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

            // find all hashes that appear in the sentence
            final Map<byte[], Integer> hs = WordTokenizer.hashSentence(sentence, null);
            final Iterator<byte[]> j = queryhashes.iterator();
            Integer pos;
            int p, minpos = sentence.length(), maxpos = -1;
            final HandleSet remainingHashes = new HandleSet(queryhashes.row().primaryKeyLength, queryhashes.comparator(), 0);
            while (j.hasNext()) {
                hash = j.next();
                pos = hs.get(hash);
                if (pos == null) {
                    try {
                        remainingHashes.put(hash);
                    } catch (RowSpaceExceededException e) {
                        Log.logException(e);
                    }
                } else {
                    p = pos.intValue();
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

            init(url.hash(), null, ResultClass.ERROR_PARSER_NO_LINES, "parser returned no sentences");
            return;
        }
        final SnippetExtractor tsr;
        String textline = null;
        HandleSet remainingHashes = queryhashes;
        try {
            tsr = new SnippetExtractor(sentences, queryhashes, snippetMaxLength);
            textline = tsr.getSnippet();
            remainingHashes =  tsr.getRemainingWords();
        } catch (final UnsupportedOperationException e) {
            init(url.hash(), null, ResultClass.ERROR_NO_MATCH, "no matching snippet found");
            return;
        }

        // compute snippet from media
        //String audioline = computeMediaSnippet(document.getAudiolinks(), queryhashes);
        //String videoline = computeMediaSnippet(document.getVideolinks(), queryhashes);
        //String appline = computeMediaSnippet(document.getApplinks(), queryhashes);
        //String hrefline = computeMediaSnippet(document.getAnchors(), queryhashes);
        //String imageline = computeMediaSnippet(document.getAudiolinks(), queryhashes);

        snippetLine = "";
        //if (audioline != null) line += (line.length() == 0) ? audioline : "<br />" + audioline;
        //if (videoline != null) line += (line.length() == 0) ? videoline : "<br />" + videoline;
        //if (appline   != null) line += (line.length() == 0) ? appline   : "<br />" + appline;
        //if (hrefline  != null) line += (line.length() == 0) ? hrefline  : "<br />" + hrefline;
        if (textline  != null) snippetLine += (snippetLine.length() == 0) ? textline  : "<br />" + textline;

        if (snippetLine == null || !remainingHashes.isEmpty()) {
            init(url.hash(), null, ResultClass.ERROR_NO_MATCH, "no matching snippet found");
            return;
        }
        if (snippetLine.length() > snippetMaxLength) snippetLine = snippetLine.substring(0, snippetMaxLength);
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

        this.cachedUrlHashs = new HashMap<String, HandleSet>();

        for (final String blacklistType : BLACKLIST_TYPES) {
            this.hostpaths_matchable.put(blacklistType, new HashMap<String, List<String>>());
            this.hostpaths_notmatchable.put(blacklistType, new HashMap<String, List<String>>());
            this.cachedUrlHashs.put(blacklistType, new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0));
        }
    }
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

        }

        if (url.getHost() == null) {
            return false;
        }
        final HandleSet urlHashCache = getCacheUrlHashsSet(blacklistType);
        if (!urlHashCache.has(url.hash())) {
            final boolean temp = isListed(blacklistType, url.getHost().toLowerCase(), url.getFile());
            if (temp) {
                try {
                    urlHashCache.put(url.hash());
                } catch (RowSpaceExceededException e) {
                    Log.logException(e);
                }
            }
            return temp;
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

        this.result = new WeakPriorityBlockingQueue<ResultEntry>(-1); // this is the result, enriched with snippets, ranked and ordered by ranking
        this.images = new WeakPriorityBlockingQueue<MediaSnippet>(-1);

        // snippets do not need to match with the complete query hashes,
        // only with the query minus the stopwords which had not been used for the search
        HandleSet filtered;
        try {
            filtered = HandleSet.joinConstructive(query.queryHashes, Switchboard.stopwordHashes);
        } catch (final RowSpaceExceededException e) {
            Log.logException(e);
            filtered = new HandleSet(query.queryHashes.row().primaryKeyLength, query.queryHashes.comparator(), 0);
        }
        this.snippetFetchWordHashes = query.queryHashes.clone();
        if (filtered != null && !filtered.isEmpty()) {
            this.snippetFetchWordHashes.excludeDestructive(Switchboard.stopwordHashes);
        }

        // start worker threads to fetch urls and snippets
        this.workerThreads = null;
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

            tc = topwords.get(descrcomp);
            if (tc > 0) r += Math.max(1, tc) << this.query.ranking.coeff_descrcompintoplist;
        }

        // apply query-in-result matching
        final HandleSet urlcomph = Word.words2hashesHandles(urlcomps);
        final HandleSet descrcomph = Word.words2hashesHandles(descrcomps);
        final Iterator<byte[]> shi = this.query.queryHashes.iterator();
        byte[] queryhash;
        while (shi.hasNext()) {
            queryhash = shi.next();
            if (urlcomph.has(queryhash)) r += 256 << this.query.ranking.coeff_appurl;
            if (descrcomph.has(queryhash)) r += 256 << this.query.ranking.coeff_app_dc_title;
        }

        return r;
    }
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

            }
        }
    }

    public HandleSet getHandleSet(final int keylength, final int space) throws RowSpaceExceededException {
        return new HandleSet(keylength, this, space);
    }
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

        this.asc = ascending;
        this.zero = null;
    }
   
    public HandleSet getHandleSet(final int keylength, final int space) throws RowSpaceExceededException {
        return new HandleSet(keylength, this, space);
    }
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

        final ReferenceContainer<ReferenceType> countRam = this.ram.get(termHash, null);
        assert countRam == null || countRam.size() >= 0;
        int c = countRam == null ? countFile : countFile + countRam.size();
        // exclude entries from delayed remove
        synchronized (this.removeDelayedURLs) {
            final HandleSet s = this.removeDelayedURLs.get(termHash);
            if (s != null) c -= s.size();
            if (c < 0) c = 0;
        }
        // put count result into cache
        if (MemoryControl.shortStatus()) this.countCache.clear();
        this.countCache.insert(termHash, c);
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.