Package com.flaptor.util.parser

Examples of com.flaptor.util.parser.ParseOutput


        if (null != destElement) {
            logger.warn("Parsed element '"+fieldName+"' already present in document. Will not overwrite.");
            return;
        }

        ParseOutput out = parser.parse("",bodyElement.getText().getBytes("UTF-8"),"UTF-8");


        for (String field: extraFields) {
            String content = out.getField(field);
            if (null == content) {
                logger.debug("had document without " + field + " field. Continuing with other fields.");
                continue;
            }
            Element docField = DocumentHelper.createElement("field");
            docField.addText(content);
            docField.addAttribute("name",field);
            docField.addAttribute("indexed", Boolean.toString(INDEXED));
            docField.addAttribute("stored", Boolean.toString(STORED));
            docField.addAttribute("tokenized", "true");
            bodyElement.getParent().add(docField);
        }


        String text = out.getText();
        Element field = DocumentHelper.createElement("field");
        field.addText(text);
        field.addAttribute("name", fieldName);
        field.addAttribute("indexed", Boolean.toString(INDEXED));
        field.addAttribute("stored", Boolean.toString(STORED));
View Full Code Here


    private void parse() {
        IParser parser = getParser();
        if (null != parser) {
            try {
                String encoding = getEncoding();
                ParseOutput out = parser.parse(page.getUrl(), content, encoding);
                this.text = out.getText();
                this.title = out.getTitle();
                List<Pair<String,String>> ol = out.getLinks();
                links = new Link[ol.size()];
                int i = 0;
                for (Pair<String,String> lnk : ol) {
                    links[i++] = new Link(lnk.first(), lnk.last());
                }
View Full Code Here

TOP

Related Classes of com.flaptor.util.parser.ParseOutput

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.