Package org.jab.docsearch.spider

Examples of org.jab.docsearch.spider.LinkFinder


        String linksListName = FileUtils.addFolder(fEnv.getIndexDirectory(), Utils.replaceAll(" ", idx.getDescription(), "_") + ".txt");
        ArrayList<SpiderUrl> oldSpiderLinks = Utils.getSpiderLinks(linksListName);

        logger.debug("doSpiderUpdate() Previously found link num total=" + oldSpiderLinks.size());

        LinkFinder ulf = new LinkFinder(idx.getPath(), linksListName, idx.getDepth(), ds, idx, oldSpiderLinks);
        ulf.init();
        try {
            ulf.update();
        }
        catch (IOException ioe) {
            logger.fatal("doSpiderUpdate() failed with IOException", ioe);
            ds.showMessage(I18n.getString("error"), ioe.toString());
        }

        int numNew = ulf.getNumNew();
        int numDeletes = ulf.getNumDeletes();
        int numMetaNoIdx = ulf.getNumMetaNoIdx();
        int numChanges = ulf.getNumUpdates();
        int numUnChanged = ulf.getNumUnchanged();
        int numFails = ulf.getNumFails();

        StringBuilder resultsMessage = new StringBuilder();
        resultsMessage.append(numNew).append(' ').append(I18n.getString("new_files")).append("\n\n");
        resultsMessage.append(numDeletes).append(' ').append(DocSearch.dsNumDelFiles).append("\n\n");
        resultsMessage.append(numChanges).append(' ').append(DocSearch.dsNumchangedFiles).append("\n\n");
View Full Code Here


        File idxpthfi = new File(idxFoldr);
        idxpthfi.mkdir();
        DocSearcherIndex dsi = new DocSearcherIndex(spiderUrl, desc, true, maxDocsToGet, idxFoldr, true, outFile, spiderUrl, DateTimeUtils.getToday(), 0, fEnv.getArchiveDirectory());
        IndexWriter writer = new IndexWriter(idxFoldr, new StandardAnalyzer(), true);
        // writer.setUseCompoundFile(true);
        LinkFinder lf = new LinkFinder(spiderUrl, outFile, maxDocsToGet, this, dsi, writer);
        lf.init();
        lf.getAllLinks();
        writer.close(); // close the writer
        indexes.add(dsi);
    }
View Full Code Here

TOP

Related Classes of org.jab.docsearch.spider.LinkFinder

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.