Examples of DecideRule


Examples of org.archive.modules.deciderules.DecideRule

        // TODO: Fix filters so work on CrawlURI.
        CrawlURI curi = (caUri instanceof CrawlURI)?
            (CrawlURI)caUri:
            new CrawlURI(caUri.getUURI());
        boolean result = false;
        DecideRule seq = getSupplementaryRule();
        if (seq.decisionFor(curi) == DecideResult.ACCEPT) {
            result = true;
            if (LOGGER.isLoggable(Level.FINER)) {
                LOGGER.finer("Accepted: " + caUri);
            }
        } else {
View Full Code Here

Examples of org.archive.modules.deciderules.DecideRule

        boolean includeSuccesses = !params.isNull("includeSuccesses");
        boolean includeFailures = !params.isNull("includeFailures");
        boolean includeScheduleds = !params.isNull("includeScheduleds");
        boolean scopeIncludes = !params.isNull("scopeIncludes");
       
        DecideRule scope = (scopeIncludes) ? frontier.getScope() : null;
        FrontierJournal newJournal = frontier.getFrontierJournal();
       
        BufferedReader br = ArchiveUtils.getBufferedReader(source);
        String read;
        int lines = 0;
        try {
            while ((read = br.readLine())!=null) {
                lines++;
                if(read.length()<4) {
                    continue;
                }
                String lineType = read.substring(0, 3);
                if(includeSuccesses && F_SUCCESS.equals(lineType)
                        || includeFailures && F_FAILURE.equals(lineType)
                        || includeScheduleds && F_ADD.equals(lineType)) {
                    try {
                        CrawlURI caUri = CrawlURI.fromHopsViaString(read.substring(3));
                        if(scope!=null) {
                            //TODO:SPRINGY
///                            caUri.setStateProvider(controller.getSheetManager());
                            // skip out-of-scope URIs if so configured
                            if(!scope.accepts(caUri)) {
                                continue;
                            }
                        }
                        frontier.considerIncluded(caUri);
                        if (newJournal != null) {
View Full Code Here

Examples of org.archive.modules.deciderules.DecideRule

        boolean scheduleFailures = !params.isNull("scheduleFailures");
        boolean scheduleScheduleds = !params.isNull("scheduleScheduleds");
        boolean scopeScheduleds = !params.isNull("scopeScheduleds");
        boolean forceRevisit = !params.isNull("forceRevisit");
       
        DecideRule scope = (scopeScheduleds) ? frontier.getScope() : null;
       
        try {
            // Scan log for all 'F+' lines: if not alreadyIncluded, schedule for
            // visitation
            br = ArchiveUtils.getBufferedReader(source);
            try {
                while ((read = br.readLine())!=null) {
                    qLines++;
                    if(read.length()<4) {
                        continue;
                    }
                    String lineType = read.substring(0, 3);
                    if(scheduleSuccesses && F_SUCCESS.equals(lineType)
                            || scheduleFailures && F_FAILURE.equals(lineType)
                            || scheduleScheduleds && F_ADD.equals(lineType)) {
                       
                        try {
                            CrawlURI caUri = CrawlURI.fromHopsViaString(read.substring(3));
                           
                            //TODO:SPRINGY
//                            caUri.setStateProvider(controller.getSheetManager());
                            if(scope!=null) {
                                // skip out-of-scope URIs if so configured
                                if(!scope.accepts(caUri)) {
                                    continue;
                                }
                            }
                           
                            caUri.setForceFetch(forceRevisit);
View Full Code Here

Examples of org.archive.modules.deciderules.DecideRule

     * @throws IOException
     */
    public long importRecoverFormat(File source, boolean applyScope,
            boolean includeOnly, boolean forceFetch, String acceptTags)
    throws IOException {
        DecideRule scope = (applyScope) ? getScope() : null;
        FrontierJournal newJournal = getFrontierJournal();
        Matcher m = Pattern.compile(acceptTags).matcher("");
        BufferedReader br = ArchiveUtils.getBufferedReader(source);
        String read;
        int lineCount = 0;
        try {
            while ((read = br.readLine())!=null) {
                lineCount++;
                if(read.length()<4) {
                    continue;
                }
                String lineType = read.substring(0, 3);
                m.reset(lineType);
                if(m.matches()) {
                    try {
                        String uriHopsViaString = read.substring(3).trim();
                        CrawlURI curi = CrawlURI.fromHopsViaString(uriHopsViaString);
                        if(scope!=null) {
                            sheetOverlaysManager.applyOverlaysTo(curi);
                            try {
                                KeyedProperties.loadOverridesFrom(curi);
                                if(!scope.accepts(curi)) {
                                    // skip out-of-scope URIs if so configured
                                    continue;
                                }
                            } finally {
                                KeyedProperties.clearOverridesFrom(curi);
View Full Code Here

Examples of org.archive.modules.deciderules.DecideRule

        BufferedReader br = null;
        String path = params.optString("path");
        boolean forceRevisit = !params.isNull("forceRevisit");
        boolean asSeeds = !params.isNull("asSeeds");
        boolean scopeScheduleds = !params.isNull("scopeScheduleds");
        DecideRule scope = scopeScheduleds ? getScope() : null;
        try {
            br = new BufferedReader(new InputStreamReader(new FileInputStream(path)));
            Iterator<String> iter = new RegexLineIterator(new LineReadingIterator(br),
                RegexLineIterator.COMMENT_LINE, extractor, output);
            while(iter.hasNext()) {
                try {
                   
                    CrawlURI curi = CrawlURI.fromHopsViaString(((String)iter.next()));
                    curi.setForceFetch(forceRevisit);
                    if (asSeeds) {
                        curi.setSeed(asSeeds);
                        if (curi.getVia() == null || curi.getVia().length() <= 0) {
                            // Danger of double-add of seeds because of this code here.
                            // Only call addSeed if no via.  If a via, the schedule will
                            // take care of updating scope.
                            getSeeds().addSeed(curi);
                        }
                    }
                    if(scope!=null) {
                        //TODO:SPRINGY
//                        curi.setStateProvider(controller.getSheetManager());
                        if(!scope.accepts(curi)) {
                            continue;
                        }
                    }
                       
                    this.controller.getFrontier().schedule(curi);
View Full Code Here

Examples of org.archive.modules.deciderules.DecideRule

        }
        return ProcessResult.PROCEED;
    }
   
    protected boolean decideToMapOutlink(CrawlURI cauri) {
        DecideRule rule = getOutlinkRule();
        boolean rejected = rule.decisionFor(cauri)
                .equals(DecideResult.REJECT);
        return !rejected;
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.