Package org.archive.crawler.datamodel

Examples of org.archive.crawler.datamodel.UriUniqFilter$CrawlUriReceiver


   
    public void instanceMain(String[] args) throws IOException {
        String testClass = args[0];
        String inputFilename = args[1];
        long start = System.currentTimeMillis();
        UriUniqFilter uniq = createUriUniqFilter(testClass);
        long created = System.currentTimeMillis();
        BufferedReader br = new BufferedReader(new FileReader(inputFilename));
        if(args.length>2) {
            String outputFilename = args[2];
            out = new BufferedWriter(new FileWriter(outputFilename));
        }
        int added = 0;
        while((current=br.readLine())!=null) {
            added++;
            uniq.add(current,null);
        }
        uniq.close();
        long finished = System.currentTimeMillis();
        if(out!=null) {
            out.close();
        }
        System.out.println(added+" adds");
        System.out.println(uniq.count()+" retained");
        System.out.println((created-start)+"ms to setup UUF");
        System.out.println((finished-created)+"ms to perform all adds");
    }
View Full Code Here


        System.out.println((created-start)+"ms to setup UUF");
        System.out.println((finished-created)+"ms to perform all adds");
    }
   
    private UriUniqFilter createUriUniqFilter(String testClass) throws IOException {
        UriUniqFilter uniq = null;
        if(BdbUriUniqFilter.class.getName().endsWith(testClass)) {;
            // BDB setup
            File tmpDir = File.createTempFile("uuf","benchmark");
            tmpDir.delete();
            tmpDir.mkdir();
            uniq = new BdbUriUniqFilter(tmpDir, 50);
        } else if(BloomUriUniqFilter.class.getName().endsWith(testClass)) {
            // bloom setup
            uniq = new BloomUriUniqFilter();
        } else if(MemUriUniqFilter.class.getName().endsWith(testClass)) {
            // mem hashset
            uniq = new MemUriUniqFilter();
        } else if (FPUriUniqFilter.class.getName().endsWith(testClass)) {
            // mem fp set (open-addressing) setup
            uniq = new FPUriUniqFilter(new MemLongFPSet(21,0.75f));
        }
        uniq.setDestination(this);
        return uniq;
    }
View Full Code Here

TOP

Related Classes of org.archive.crawler.datamodel.UriUniqFilter$CrawlUriReceiver

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.