Package net.yacy.kelondro.index

Examples of net.yacy.kelondro.index.HandleMap


        Random r = new Random(0);
        long start = System.currentTimeMillis();

        System.gc(); // for resource measurement
        long a = MemoryControl.available();
        HandleMap idx = new HandleMap(12, Base64Order.enhancedCoder, 4, 150000, "test");
        for (int i = 0; i < count; i++) {
            try {
                idx.inc(FlatWordPartitionScheme.positionToHash(r.nextInt(count)));
            } catch (RowSpaceExceededException e) {
                Log.logException(e);
                break;
            }
        }
        long timek = ((long) count) * 1000L / (System.currentTimeMillis() - start);
        System.out.println("Result HandleMap: " + timek + " inc per second");
        System.gc();
        long memk = a - MemoryControl.available();
        System.out.println("Used Memory: " + memk + " bytes");
        System.out.println("x " + idx.get(FlatWordPartitionScheme.positionToHash(0)));
        idx.close();
       
        r = new Random(0);
        start = System.currentTimeMillis();
        byte[] hash;
        Integer d;
View Full Code Here


        }
       
        // there is an index and a gap file:
        // read the index file:
        try {
            this.index = new HandleMap(this.keylength, this.ordering, 8, this.fingerprintFileIdx);
        } catch (IOException e) {
            Log.logException(e);
            return false;
        } catch (RowSpaceExceededException e) {
            Log.logException(e);
View Full Code Here

                            final ReferenceFactory<ReferenceType> factory,
                            final ByteOrder termOrder,
                            final Row payloadrow) throws IOException, RowSpaceExceededException {
      
        System.out.println("CELL REFERENCE COLLECTION startup");
        HandleMap references = new HandleMap(payloadrow.primaryKeyLength, termOrder, 4, 1000000, heapLocation.getAbsolutePath());
        String[] files = heapLocation.list();
        for (String f: files) {
            if (f.length() < 22 || !f.startsWith("text.index") || !f.endsWith(".blob")) continue;
            File fl = new File(heapLocation, f);
            System.out.println("CELL REFERENCE COLLECTION opening blob " + fl);
            CloneableIterator<ReferenceContainer<ReferenceType>>  ei = new ReferenceIterator<ReferenceType>(fl, factory);
       
            ReferenceContainer<ReferenceType> container;
            final long start = System.currentTimeMillis();
            long lastlog = start - 27000;
            int count = 0;
            ReferenceType reference;
            byte[] mh;
            while (ei.hasNext()) {
                container = ei.next();
                if (container == null) continue;
                Iterator<ReferenceType> refi = container.entries();
                while (refi.hasNext()) {
                  reference = refi.next();
                  if (reference == null) continue;
                  mh = reference.urlhash();
                  if (mh == null) continue;
                    references.inc(mh);
                }
                count++;
                // write a log
                if (System.currentTimeMillis() - lastlog > 30000) {
                    System.out.println("CELL REFERENCE COLLECTION scanned " + count + " RWI index entries. ");
View Full Code Here

                // for the index
                Log.logSevere("TABLE", tablefile.getName() + ": not enough RAM (" + (MemoryControl.available() / 1024 / 1024) + "MB) left for index, deleting allocated table space to enable index space allocation (needed: " + (neededRAM4index / 1024 / 1024) + "MB)");
                this.table = null; System.gc();
                Log.logSevere("TABLE", tablefile.getName() + ": RAM after releasing the table: " + (MemoryControl.available() / 1024 / 1024) + "MB");
            }
            this.index = new HandleMap(rowdef.primaryKeyLength, rowdef.objectOrder, 4, records, tablefile.getAbsolutePath());
            final HandleMap errors = new HandleMap(rowdef.primaryKeyLength, NaturalOrder.naturalOrder, 4, records, tablefile.getAbsolutePath() + ".errors");
            Log.logInfo("TABLE", tablefile + ": TABLE " + tablefile.toString() + " has table copy " + ((this.table == null) ? "DISABLED" : "ENABLED"));

            // read all elements from the file into the copy table
            Log.logInfo("TABLE", "initializing RAM index for TABLE " + tablefile.getName() + ", please wait.");
            int i = 0;
            byte[] key;
            if (this.table == null) {
                final Iterator<byte[]> ki = new ChunkIterator(tablefile, rowdef.objectsize, rowdef.primaryKeyLength);
                while (ki.hasNext()) {
                    key = ki.next();
                    // write the key into the index table
                    assert key != null;
                    if (key == null) {i++; continue;}
                    if (rowdef.objectOrder.wellformed(key)) {
                        this.index.putUnique(key, i++);
                    } else {
                        errors.putUnique(key, i++);
                    }
                }
            } else {
                byte[] record;
                key = new byte[rowdef.primaryKeyLength];
                final Iterator<byte[]> ri = new ChunkIterator(tablefile, rowdef.objectsize, rowdef.objectsize);
                while (ri.hasNext()) {
                    record = ri.next();
                    assert record != null;
                    if (record == null) {i++; continue;}
                    System.arraycopy(record, 0, key, 0, rowdef.primaryKeyLength);

                    // write the key into the index table
                    if (rowdef.objectOrder.wellformed(key)) {
                        this.index.putUnique(key, i++);
                        // write the tail into the table
                        try {
                            this.table.addUnique(this.taildef.newEntry(record, rowdef.primaryKeyLength, true));
                        } catch (final RowSpaceExceededException e) {
                            this.table = null;
                            break;
                        }
                        if (abandonTable()) {
                            this.table = null;
                            break;
                        }
                    } else {
                        errors.putUnique(key, i++);
                    }
                }
            }

            // open the file
            this.file = new BufferedRecords(new Records(tablefile, rowdef.objectsize), this.buffersize);
            assert this.file.size() == this.index.size() : "file.size() = " + this.file.size() + ", index.size() = " + this.index.size() + ", file = " + filename();

            // clean up the file by cleaning badly formed entries
            final int errorc = errors.size();
            int errorcc = 0;
            int idx;
            for (final Entry entry: errors) {
                key = entry.getPrimaryKeyBytes();
                idx = (int) entry.getColLong(1);
                Log.logWarning("Table", "removing not well-formed entry " + idx + " with key: " + NaturalOrder.arrayList(key, 0, key.length) + ", " + errorcc++ + "/" + errorc);
                removeInFile(idx);
            }
            errors.close();
            assert this.file.size() == this.index.size() : "file.size() = " + this.file.size() + ", index.size() = " + this.index.size() + ", file = " + filename();

            // remove doubles
            if (!freshFile) {
                final ArrayList<long[]> doubles = this.index.removeDoubles();
View Full Code Here

     */
    public HeapWriter(final File temporaryHeapFile, final File readyHeapFile, final int keylength, final ByteOrder ordering, int outBuffer) throws IOException {
        this.heapFileTMP = temporaryHeapFile;
        this.heapFileREADY = readyHeapFile;
        this.keylength = keylength;
        this.index = new HandleMap(keylength, ordering, 8, 100000, readyHeapFile.getAbsolutePath());
        try {
            this.os = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(temporaryHeapFile), outBuffer));
        } catch (OutOfMemoryError e) {
            // try this again without buffer
            this.os = new DataOutputStream(new FileOutputStream(temporaryHeapFile));
View Full Code Here

        System.out.println("finished");
    }

    public static void incell(final File cellPath, final String statisticPath) {
        try {
          final HandleMap idx = ReferenceContainerArray.referenceHashes(
                cellPath,
                Segment.wordReferenceFactory,
                Base64Order.enhancedCoder,
                WordReferenceRow.urlEntryRow);
            System.out.println("INDEX REFERENCE COLLECTION starting dump of statistics");
            idx.dump(new File(statisticPath));
            System.out.println("INDEX REFERENCE COLLECTION finished dump, wrote " + idx.size() + " entries to " + statisticPath);
            idx.close();
        } catch (final Exception e) {
            Log.logException(e);
        }
    }
View Full Code Here

        }
    }

    public static int diffurlcol(final String metadataPath, final String statisticFile, final String diffFile) throws IOException, RowSpaceExceededException {
        System.out.println("INDEX DIFF URL-COL startup");
        final HandleMap idx = new HandleMap(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 4, new File(statisticFile));
        final MetadataRepository mr = new MetadataRepository(new File(metadataPath), "text.urlmd", false, false);
        final HandleSet hs = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 1000000);
        System.out.println("INDEX DIFF URL-COL loaded dump, starting diff");
        final long start = System.currentTimeMillis();
        long update = start - 7000;
        int count = 0;
        for (final byte[] refhash: mr) {
            if (idx.get(refhash) == -1) {
                // the key exists as urlhash in the URL database, but not in the collection as referenced urlhash
                hs.put(refhash);
            }
            count++;
            if (System.currentTimeMillis() - update > 10000) {
                System.out.println("INDEX DIFF URL-COL running, checked " + count + ", found " + hs.size() + " missing references so far, " + (((System.currentTimeMillis() - start) * (mr.size() - count) / count) / 60000) + " minutes remaining");
                update = System.currentTimeMillis();
            }
        }
        idx.close();
        mr.close();
        System.out.println("INDEX DIFF URL-COL finished diff, starting dump to " + diffFile);
        count = hs.dump(new File(diffFile));
        System.out.println("INDEX DIFF URL-COL finished dump, wrote " + count + " references that occur in the URL-DB, but not in the collection-dump");
        return count;
View Full Code Here

        }
       
        // there is an index and a gap file:
        // read the index file:
        try {
            this.index = new HandleMap(this.keylength, this.ordering, 8, this.fingerprintFileIdx);
        } catch (IOException e) {
            Log.logException(e);
            return false;
        } catch (RowSpaceExceededException e) {
            Log.logException(e);
View Full Code Here

     */
    public HeapWriter(final File temporaryHeapFile, final File readyHeapFile, final int keylength, final ByteOrder ordering, int outBuffer) throws IOException {
        this.heapFileTMP = temporaryHeapFile;
        this.heapFileREADY = readyHeapFile;
        this.keylength = keylength;
        this.index = new HandleMap(keylength, ordering, 8, 100000, readyHeapFile.getAbsolutePath());
        try {
            this.os = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(temporaryHeapFile), outBuffer));
        } catch (OutOfMemoryError e) {
            // try this again without buffer
            this.os = new DataOutputStream(new FileOutputStream(temporaryHeapFile));
View Full Code Here

                            final ReferenceFactory<ReferenceType> factory,
                            final ByteOrder termOrder,
                            final Row payloadrow) throws IOException, RowSpaceExceededException {

        System.out.println("CELL REFERENCE COLLECTION startup");
        final HandleMap references = new HandleMap(payloadrow.primaryKeyLength, termOrder, 4, 1000000, heapLocation.getAbsolutePath());
        final String[] files = heapLocation.list();
        for (final String f: files) {
            if (f.length() < 22 || !f.startsWith("text.index") || !f.endsWith(".blob")) continue;
            final File fl = new File(heapLocation, f);
            System.out.println("CELL REFERENCE COLLECTION opening blob " + fl);
            final CloneableIterator<ReferenceContainer<ReferenceType>>  ei = new ReferenceIterator<ReferenceType>(fl, factory);

            ReferenceContainer<ReferenceType> container;
            final long start = System.currentTimeMillis();
            long lastlog = start - 27000;
            int count = 0;
            ReferenceType reference;
            byte[] mh;
            while (ei.hasNext()) {
                container = ei.next();
                if (container == null) continue;
                final Iterator<ReferenceType> refi = container.entries();
                while (refi.hasNext()) {
                  reference = refi.next();
                  if (reference == null) continue;
                  mh = reference.urlhash();
                  if (mh == null) continue;
                    references.inc(mh);
                }
                count++;
                // write a log
                if (System.currentTimeMillis() - lastlog > 30000) {
                    System.out.println("CELL REFERENCE COLLECTION scanned " + count + " RWI index entries. ");
View Full Code Here

TOP

Related Classes of net.yacy.kelondro.index.HandleMap

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.