Examples of ParsedDatum


Examples of bixo.datum.ParsedDatum

            Callable<ParsedDatum> c = new TikaCallable(_parser, _contentExtractor, _linkExtractor, is, metadata, isExtractLanguage(), _parseContext);
            FutureTask<ParsedDatum> task = new FutureTask<ParsedDatum>(c);
            Thread t = new Thread(task);
            t.start();
           
            ParsedDatum result;
            try {
                result = task.get(getParserPolicy().getMaxParseDuration(), TimeUnit.MILLISECONDS);
            } catch (TimeoutException e) {
                task.cancel(true);
                t.interrupt();
                throw e;
            } finally {
                t = null;
            }
           
            // TODO KKr Should there be a BaseParser to take care of copying
            // these two fields?
            result.setHostAddress(fetchedDatum.getHostAddress());
            result.setPayload(fetchedDatum.getPayload());
            return result;
        } finally {
            IoUtils.safeClose(is);
        }
    }
View Full Code Here

Examples of bixo.datum.ParsedDatum

        super.prepare(process, opCall);
       
        _reader = new SAXReader(new Parser());
        _reader.setXMLFilter(new DowngradeXmlFilter(_removeNamespaces));
        _reader.setEncoding("UTF-8");
        _input = new ParsedDatum();
    }
View Full Code Here

Examples of bixo.datum.ParsedDatum

                _parseContext = makeParseContext();
            }
            _parser.parse(_input, teeContentHandler, _metadata, _parseContext);
           
            String lang = _extractLanguage ? detectLanguage(_metadata, profilingHandler) : "";
            return new ParsedDatum(_metadata.get(Metadata.RESOURCE_NAME_KEY), null, _contentExtractor.getContent(), lang,
                            _metadata.get(Metadata.TITLE),
                            _linkExtractor.getLinks(), makeMap(_metadata));
        } catch (Exception e) {
            // Generic exception that's OK to re-throw
            throw e;
View Full Code Here

Examples of bixo.datum.ParsedDatum

        @Override
        public void operate(FlowProcess flowProcess, FunctionCall<NullContext> functionCall) {
            FetchedDatum fetchedDatum = new FetchedDatum(functionCall.getArguments());
           
            try {
                ParsedDatum parseResult = _parser.parse(fetchedDatum);
                _flowProcess.increment(ParserCounters.DOCUMENTS_PARSED, 1);
                functionCall.getOutputCollector().add(BixoPlatform.clone(parseResult.getTuple(), flowProcess));
            } catch (Exception e) {
                LOGGER.warn("Error processing " + fetchedDatum.getUrl(), e);
                _flowProcess.increment(ParserCounters.DOCUMENTS_FAILED, 1);
                // TODO KKr - don't lose datums for documents that couldn't be parsed
            }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.