Package org.webharvest.definition

Examples of org.webharvest.definition.ScraperConfiguration


        this.setProperty("Path", path);
       
        path = CommonUtil.adaptFilename(path);
        String fullPath = path;

        ScraperConfiguration configuration = scraper.getConfiguration();
        File originalFile = configuration.getSourceFile();
        String originalUrl = configuration.getUrl();
        if (originalFile != null) {
            String originalPath = CommonUtil.adaptFilename( originalFile.getAbsolutePath() );
            int index = originalPath.lastIndexOf('/');
            if (index > 0) {
                String workingPath = originalPath.substring(0, index);
                fullPath = CommonUtil.getAbsoluteFilename(workingPath, path);
            }
        } else if (originalUrl != null) {
            fullPath = CommonUtil.fullUrl(originalUrl, path);
            isUrl = true;
        }

        ScraperConfiguration includedConfig;
        try {
            includedConfig = isUrl ? new ScraperConfiguration(new URL(fullPath)) : new ScraperConfiguration(fullPath);
            scraper.execute(includedConfig.getOperations());
            return new EmptyVariable();
        } catch (FileNotFoundException e) {
            throw new FileException("Cannot include configuration file " + fullPath, e);
        } catch (MalformedURLException e) {
            throw new FileException("Cannot include configuration file " + fullPath, e);
View Full Code Here


                this.configDocument.load(source == null ? "" : source.toString());
            }
           
            refreshTree();
            InputSource in = new InputSource(new StringReader(xmlPane.getText()));
            ScraperConfiguration scraperConfiguration = new ScraperConfiguration(in);
            setScraperConfiguration(scraperConfiguration);
        } catch (IOException e) {
            GuiUtils.showErrorMessage( e.getMessage() );
        }
    }
View Full Code Here

        updateControls();

        String xmlContent = this.xmlPane.getText();
        InputSource in = new InputSource( new StringReader(xmlContent) );
        try {
            ScraperConfiguration scraperConfiguration = new ScraperConfiguration(in);
            scraperConfiguration.setSourceFile( this.configDocument.getFile() );
            scraperConfiguration.setUrl( this.configDocument.getUrl() );

            setScraperConfiguration(scraperConfiguration);

            ide.setTabIcon(this, null);
        } catch(Exception e) {
View Full Code Here

//      props.setProperty("log4j.appender.stdout.layout", "org.apache.log4j.PatternLayout");
//      props.setProperty("log4j.appender.stdout.layout.ConversionPattern", "%-5p (%20F:%-3L) - %m\n");
//        PropertyConfigurator.configure(props);

//        ScraperConfiguration config = new ScraperConfiguration("c:/temp/scrapertest/configs/test2.xml");
        ScraperConfiguration config = new ScraperConfiguration("c:/temp/scrapertest/dddd.xml");
//        ScraperConfiguration config = new ScraperConfiguration( new URL("http://localhost/scripts/test/sample8.xml") );
        Scraper scraper = new Scraper(config, "c:/temp/scrapertest/");

        scraper.setDebug(true);
View Full Code Here

                        System.out.println(e.getMessage());
                    }
                }
            }

            ScraperConfiguration config = null;
            String configLowercase = configFilePath.toLowerCase();
            if ( configLowercase.startsWith("http://") || configLowercase.startsWith("https://") ) {
                config = new ScraperConfiguration( new URL(configFilePath) );
            } else {
                config = new ScraperConfiguration(configFilePath);
            }

            Scraper scraper = new Scraper(config, workingDir);

            String isDebug = (String) params.get("debug");
View Full Code Here

    }
    public void executeConfig()
    {
        try
        {
            ScraperConfiguration config = new ScraperConfiguration(configFile);
            Scraper scraper = new Scraper(config,resFile);
            scraper.setDebug(true);
            scraper.execute();
        }
        catch (FileNotFoundException ex)
View Full Code Here

    private void scrape() throws CarbonException {
        //todo need to add proxy info
        InputStream in = new ByteArrayInputStream(config.toString().getBytes());
        InputSource inputSource = new InputSource(in);
        ScraperConfiguration scraperConfiguration = new ScraperConfiguration(inputSource);
        Scraper scraper = new Scraper(scraperConfiguration, "");
        // Execute the scraper config
        scraper.execute();
        scraperContext = scraper.getContext();
    }
View Full Code Here

        this.webHarvestConfigPath = this.getProperty("web_harvest_config");
    }

    public Scraper getScraperConfig() throws DataServiceFault {
        Scraper scraper;
        ScraperConfiguration scraperConfiguration;
        try {
            /* For the given file path of the web harvest configuration */
            if (!webHarvestConfigPath.trim().startsWith("<config>")) {
                scraperConfiguration = new ScraperConfiguration(webHarvestConfigPath);
            } else {
                /* If the Web harvest configuration has provided */
                InputStream in = new ByteArrayInputStream(webHarvestConfigPath.getBytes());
                InputSource inputSource = new InputSource(in);
                scraperConfiguration = new ScraperConfiguration(inputSource);
            }
            scraper = new Scraper(scraperConfiguration, "")
            return scraper;
        } catch (FileNotFoundException e) {
            throw new DataServiceFault(e, "Error in reading web harvest configuration");
View Full Code Here

TOP

Related Classes of org.webharvest.definition.ScraperConfiguration

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.