Examples of org.apache.any23.configuration.Configuration

org.apache.any23.extractor.ExtractionContext
Defines the main Any23 configuration.

96 97 98 99 100 101 102 103 104 105 106 107 108     public static Communicator getCommunicator() throws Exception {         if (communicator == null) {             Configuration.loadConfiguration();             //get lodead configuration             Configuration config = Configuration.getCurrentConfiguration();             //initializing communicator based on the configuration             initializeCommunicator(config.getClusterConfiguration());             int rmiPort = Configuration.getCurrentConfiguration().getCurrentNodeConfiguration().getRmiRepositoryPort();             Communicator.getCommunicator().initializeCommunicatorServer(rmiPort);         }         return communicator;     }

View Full Code Here

84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122     }     public static void main(String[] args)     {         final DownloadReport report = new DownloadReport();         Configuration conf = new Configuration();         if (parseArgs(args, conf))         {             try             {                 /* Parse the configuration xml file */                 conf.loadFile((String) conf.get("config_file"));                 /* Load the plugins */                 PluginLoader loader = new PluginLoader((String)(conf.get(Configuration.PLUGIN_FOLDER_TAG)));                 loader.load();                 /* Get the subreddits */                 List<String> subreddits = (List<String>)conf.get(Configuration.SUBREDDITS_ALLOWED_TAG);                 /* Get number of download threads and create the thread pool */                 String downloadThreads = (String)conf.get(Configuration.PARALLEL_DOWNLOAD_TAG);                 int numberOfThreads = downloadThreads == null ? 10 : Integer.valueOf(downloadThreads);                 ExecutorService threadPool = Executors.newFixedThreadPool(numberOfThreads);                 /* Connect to reddit and get the content from the source given */                 System.out.println("Connecting to reddit");                 RedditRequest reddit = new RedditRequest((String) conf.get("reddit_token"));                 System.out.println("Fetching results");                 String sourceUrl = (String) conf.get("source_url");                 String countStr = (String) conf.get("count");                 int count = countStr == null ? 25 : Integer.valueOf(countStr);                 int numberOfBatchs = count / 100 + 1;                 String lastFetched = null;                 for (int i = 0; i < numberOfBatchs; ++i)

View Full Code Here

91 92 93 94 95 96 97 98     private ByteArrayOutputStream byteOutStream = new ByteArrayOutputStream();     public WebResponder(Servlet any23servlet, HttpServletResponse response) {         this.any23servlet = any23servlet;         this.response = response;         this.runner = new Any23();         runner.setHTTPUserAgent("Any23-Servlet");     }

View Full Code Here

107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171             String format,             boolean report, boolean annotate     ) throws IOException {         if (in == null) return;         if (!initRdfWriter(format, report, annotate)) return;         final ExtractionReport er;         try {             er = runner.extract(eps, in, rdfWriter);             rdfWriter.close();             if (! er.hasMatchingExtractors() ) {                 sendError(                         415,                         "No suitable extractor found for this media type",                         null,                         er,                         report                 );                 return;             }         } catch (IOException ioe) {             // IO Error.             if (ioe.getCause() != null && ValidatorException.class.equals(ioe.getCause().getClass())) {                 final String errMsg = "Could not fetch input, IO Error.";                 any23servlet.log(errMsg, ioe.getCause());                 sendError(502, errMsg, ioe, null, report);                 return;             }             any23servlet.log("Could not fetch input", ioe);             sendError(502, "Could not fetch input.", ioe, null, report);             return;         } catch (ExtractionException e) {             // Extraction error.             any23servlet.log("Could not parse input", e);             sendError(502, "Could not parse input.", e, null, report);             return;         } catch (Exception e) {             any23servlet.log("Internal error", e);             sendError(500, "Internal error.", e, null, report);             return;         }         /* *** No triples found. *** */         any23servlet.log("Extraction complete, " + reporter.getTotalTriples() + " triples");         if (reporter.getTotalTriples() == 0) {             sendError(                     501,                     "Extraction completed. No triples have been found.",                     null,                     er, report             );             return;         }         // Regular response.         response.setContentType(outputMediaType);         response.setStatus(200);         // Set the output encoding equals to the input one.         final String charsetEncoding = er.getEncoding();         if (Charset.isSupported(charsetEncoding)) {             response.setCharacterEncoding(er.getEncoding());         } else {             response.setCharacterEncoding("UTF-8");         }         final ServletOutputStream sos = response.getOutputStream();

View Full Code Here

94 95 96 97 98 99 100 101 102 103 104 105 106 107      */     @Test     public void testDetectCLIPlugins() throws IOException {         final Iterator<Tool> tools = manager.getApplicableTools(CRAWLER_TARGET_DIR, CRAWLER_DEPENDENCY_DIR);         final Set<String> toolClasses = new HashSet<String>();         Tool tool;         while(tools.hasNext()) {             tool = tools.next();             assertTrue("Found duplicate tool.", toolClasses.add(tool.getClass().getName()));         }         assertTrue(                 String.format(                         "Expected [%s] plugin be detected, but not found int the built classpath",                         Crawler.class.getName()

View Full Code Here

152 153 154 155 156 157 158 159 160 161 162                                                      );         }         reportingTripleHandler = new ReportingTripleHandler(tripleHandler);         final Configuration configuration = DefaultConfiguration.singleton();         extractionParameters =                 pedantic                         ?                 new ExtractionParameters(configuration, ValidationMode.ValidateAndFix, nestingDisabled)                         :

View Full Code Here

150 151 152 153 154 155 156 157 158 159 160                                                      );         }         reportingTripleHandler = new ReportingTripleHandler(tripleHandler);         final Configuration configuration = DefaultConfiguration.singleton();         extractionParameters =                 pedantic                         ?                 new ExtractionParameters(configuration, ValidationMode.ValidateAndFix, nestingDisabled)                         :

View Full Code Here

236 237 238 239 240 241 242 243 244 245 246 247         final CompositeTripleHandler cth = new CompositeTripleHandler();         cth.addChild(rdfxmlWriter);         cth.addChild(repositoryWriter);         final ModifiableConfiguration configuration = DefaultConfiguration.copy();         configuration.setProperty("any23.extraction.metadata.domain.per.entity", "on");         SingleDocumentExtraction instance = new SingleDocumentExtraction(                 configuration,                 new HTMLFixture(copyResourceToTempFile(file)).getOpener("http://nested.test.com"),                 extractorGroup,                 cth

View Full Code Here

517 518 519 520 521 522 523 524 525 526 527 528         logger.debug(n3);     }     @Test     public void testModifiableConfiguration_issue183() throws Exception {         final ModifiableConfiguration modifiableConf = DefaultConfiguration.copy();         modifiableConf.setProperty("any23.extraction.metadata.timesize", "off");         final Any23 any23 = new Any23(modifiableConf);         final String content = FileUtils.readResourceContent("/rdf/rdf-issue183.ttl");         final DocumentSource source = new StringDocumentSource(content, "http://base.com");         final ByteArrayOutputStream out = new ByteArrayOutputStream();

View Full Code Here

69 70 71 72 73 74 75 76 77 78 79     public void testTypedLiteralIncompatibleValueSupport()     throws IOException, ExtractionException, TripleHandlerException {         final URI uri = RDFUtils.uri("http://host.com/test-malformed-literal.turtle");         ByteArrayOutputStream baos = new ByteArrayOutputStream();         final TripleHandler th = new RDFXMLWriter(baos);         final ExtractionContext extractionContext = new ExtractionContext("turtle-extractor", uri);         final ExtractionResult result = new ExtractionResultImpl(extractionContext, extractor, th);         extractor.setStopAtFirstError(false);         try {             extractor.run(                     ExtractionParameters.newDefault(),

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.any23.configuration.Configuration

org.apache.any23.AbstractAny23TestBase

org.apache.any23.Any23

org.apache.any23.Any23Test

org.apache.any23.cli.ExtractorDocumentation

org.apache.any23.cli.MicrodataParser

org.apache.any23.cli.MicrodataParser$MicrodataParserDocumentSourceConverter

org.apache.any23.cli.MimeDetector

org.apache.any23.cli.MimeDetector$MimeDetectorDocumentSourceConverter

org.apache.any23.cli.Rover

org.apache.any23.cli.Tool

Copyright © 2018 www.massapicom. All rights reserved.

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.