Examples of HandleSet


Examples of net.yacy.kelondro.index.HandleSet

        this.delayed = new TreeMap<Long, byte[]>();
        this.minimumLocalDelta = minimumLocalDelta;
        this.minimumGlobalDelta = minimumGlobalDelta;
        this.myAgentIDs = myAgentIDs;
        this.domStackInitSize = Integer.MAX_VALUE;
        this.ddc = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
        this.double_push_check = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);

        // create a stack for newly entered entries
        if (!(cachePath.exists())) cachePath.mkdir(); // make the path
        this.cacheStacksPath.mkdirs();
        final File f = new File(this.cacheStacksPath, stackname + indexSuffix);
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

        // removes all entries with a specific profile hash.
        // this may last some time
        // returns number of deletions

        // first find a list of url hashes that shall be deleted
        final HandleSet urlHashes = Base64Order.enhancedCoder.getHandleSet(this.urlFileIndex.row().primaryKeyLength, 100);
        final long terminate = (timeout > 0) ? System.currentTimeMillis() + timeout : Long.MAX_VALUE;
        synchronized (this) {
            final Iterator<Row.Entry> i = this.urlFileIndex.rows();
            Row.Entry rowEntry;
            Request crawlEntry;
            while (i.hasNext() && (System.currentTimeMillis() < terminate)) {
                rowEntry = i.next();
                crawlEntry = new Request(rowEntry);
                if (crawlEntry.profileHandle().equals(profileHandle)) {
                    urlHashes.put(crawlEntry.url().hash());
                }
            }
        }

        // then delete all these urls from the queues and the file index
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

            }
        }

        // iterate through the domain stacks
        final Iterator<Map.Entry<String, HandleSet>> q = this.domainStacks.entrySet().iterator();
        HandleSet stack;
        while (q.hasNext()) {
            stack = q.next().getValue();
            for (final byte[] handle: urlHashes) stack.remove(handle);
            if (stack.isEmpty()) q.remove();
        }

        return removedCounter;
    }
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

    }

    private void pushHashToDomainStacks(String host, final byte[] urlhash) throws RowSpaceExceededException {
        // extend domain stack
        if (host == null) host = localhost;
        HandleSet domainList = this.domainStacks.get(host);
        if (domainList == null) {
            // create new list
            domainList = new HandleSet(12, Base64Order.enhancedCoder, 1);
            domainList.put(urlhash);
            this.domainStacks.put(host, domainList);
        } else {
            // extend existent domain list
          domainList.put(urlhash);
        }
    }
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

    }

    private void removeHashFromDomainStacks(String host, final byte[] urlhash) {
        // reduce domain stack
        if (host == null) host = localhost;
        final HandleSet domainList = this.domainStacks.get(host);
        if (domainList == null) {
            this.domainStacks.remove(host);
            return;
        }
        domainList.remove(urlhash);
        if (domainList.isEmpty()) this.domainStacks.remove(host);
    }
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

    private void fillDomainStacks() throws IOException {
      if (!this.domainStacks.isEmpty() && System.currentTimeMillis() - this.lastDomainStackFill < 120000L) return;
      this.domainStacks.clear();
      this.top.clear();
      this.lastDomainStackFill = System.currentTimeMillis();
      final HandleSet handles = this.urlFileIndex.keysFromBuffer(objectIndexBufferSize / 2);
        final CloneableIterator<byte[]> i = handles.keys(true, null);
        byte[] handle;
        String host;
        Request request;
      while (i.hasNext()) {
          handle = i.next();
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

    public static int diffurlcol(final String metadataPath, final String statisticFile, final String diffFile) throws IOException, RowSpaceExceededException {
        System.out.println("INDEX DIFF URL-COL startup");
        final HandleMap idx = new HandleMap(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 4, new File(statisticFile));
        final MetadataRepository mr = new MetadataRepository(new File(metadataPath), "text.urlmd", false, false);
        final HandleSet hs = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 1000000);
        System.out.println("INDEX DIFF URL-COL loaded dump, starting diff");
        final long start = System.currentTimeMillis();
        long update = start - 7000;
        int count = 0;
        for (final byte[] refhash: mr) {
            if (idx.get(refhash) == -1) {
                // the key exists as urlhash in the URL database, but not in the collection as referenced urlhash
                hs.put(refhash);
            }
            count++;
            if (System.currentTimeMillis() - update > 10000) {
                System.out.println("INDEX DIFF URL-COL running, checked " + count + ", found " + hs.size() + " missing references so far, " + (((System.currentTimeMillis() - start) * (mr.size() - count) / count) / 60000) + " minutes remaining");
                update = System.currentTimeMillis();
            }
        }
        idx.close();
        mr.close();
        System.out.println("INDEX DIFF URL-COL finished diff, starting dump to " + diffFile);
        count = hs.dump(new File(diffFile));
        System.out.println("INDEX DIFF URL-COL finished dump, wrote " + count + " references that occur in the URL-DB, but not in the collection-dump");
        return count;
    }
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

    public static void export(final String metadataPath, final int format, final String export, final String diffFile) throws IOException, RowSpaceExceededException {
        // format: 0=text, 1=html, 2=rss/xml
        System.out.println("URL EXPORT startup");
        final MetadataRepository mr = new MetadataRepository(new File(metadataPath), "text.urlmd", false, false);
        final HandleSet hs = (diffFile == null) ? null : new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, new File(diffFile));
        System.out.println("URL EXPORT loaded dump, starting export");
        final Export e = mr.export(new File(export), ".*", hs, format, false);
        try {
            e.join();
        } catch (final InterruptedException e1) {
            Log.logException(e1);
        }
        System.out.println("URL EXPORT finished export, wrote " + ((hs == null) ? mr.size() : hs.size()) + " entries");
    }
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

    public static void delete(final String metadataPath, final String diffFile) throws IOException, RowSpaceExceededException {
        System.out.println("URL DELETE startup");
        final MetadataRepository mr = new MetadataRepository(new File(metadataPath), "text.urlmd", false, false);
        final int mrSize = mr.size();
        final HandleSet hs = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, new File(diffFile));
        System.out.println("URL DELETE loaded dump, starting deletion of " + hs.size() + " entries from " + mrSize);
        for (final byte[] refhash: hs) {
            mr.remove(refhash);
        }
        System.out.println("URL DELETE finished deletions, " + mr.size() + " entries left in URL database");
    }
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

            final RankingProfile ranking,
            final String userAgent) {

      if ((queryString.length() == 12) && (Base64Order.enhancedCoder.wellformed(UTF8.getBytes(queryString)))) {
            this.queryString = null;
            this.queryHashes = new HandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
            this.excludeHashes = new HandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
            try {
                this.queryHashes.put(UTF8.getBytes(queryString));
            } catch (final RowSpaceExceededException e) {
                Log.logException(e);
            }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.