Examples of TeeContentHandler


Examples of org.apache.tika.sax.TeeContentHandler

            ContentHandler ch, Metadata md, String name, String attribute) {
        Matcher matcher =
            META_XPATH.parse("//meta:document-statistic/@meta:" + attribute);
        ContentHandler branch =
            new MatchingContentHandler(new MetadataHandler(md, name), matcher);
        return new TeeContentHandler(ch, branch);
    }
View Full Code Here

Examples of org.apache.tika.sax.TeeContentHandler

            StringWriter textBuffer = new StringWriter();
            StringWriter textMainBuffer = new StringWriter();
            StringWriter xmlBuffer = new StringWriter();
            StringBuilder metadataBuffer = new StringBuilder();

            ContentHandler handler = new TeeContentHandler(
                    getHtmlHandler(htmlBuffer),
                    getTextContentHandler(textBuffer),
                    getTextMainContentHandler(textMainBuffer),
                    getXmlContentHandler(xmlBuffer));
           
View Full Code Here

Examples of org.apache.tika.sax.TeeContentHandler

        Matcher matcher = new CompositeMatcher(
                DC_XPATH.parse("//dc:" + element),
                DC_XPATH.parse("//dc:" + element + "//text()"));
        ContentHandler branch =
            new MatchingContentHandler(new MetadataHandler(md, name), matcher);
        return new TeeContentHandler(ch, branch);
    }
View Full Code Here

Examples of org.apache.tika.sax.TeeContentHandler

        Matcher matcher = new CompositeMatcher(
                META_XPATH.parse("//meta:" + element),
                META_XPATH.parse("//meta:" + element + "//text()"));
        ContentHandler branch =
            new MatchingContentHandler(new MetadataHandler(md, property), matcher);
        return new TeeContentHandler(ch, branch);
    }
View Full Code Here

Examples of org.apache.tika.sax.TeeContentHandler

                META_XPATH.parse("//meta:user-defined//text()"));
        // eg <meta:user-defined meta:name="Info1">Text1</meta:user-defined> becomes custom:Info1=Text1
        ContentHandler branch = new MatchingContentHandler(
              new AttributeDependantMetadataHandler(md, "meta:name", Metadata.USER_DEFINED_METADATA_NAME_PREFIX),
              matcher);
        return new TeeContentHandler(ch, branch);
    }
View Full Code Here

Examples of org.apache.tika.sax.TeeContentHandler

            ContentHandler ch, Metadata md, String name, String attribute) {
        Matcher matcher =
            META_XPATH.parse("//meta:document-statistic/@meta:"+attribute);
        ContentHandler branch = new MatchingContentHandler(
              new AttributeMetadataHandler(META_NS, attribute, md, name), matcher);
        return new TeeContentHandler(ch, branch);
    }
View Full Code Here

Examples of org.apache.tika.sax.TeeContentHandler

          ContentHandler ch, Metadata md, Property property, String attribute) {
      Matcher matcher =
          META_XPATH.parse("//meta:document-statistic/@meta:"+attribute);
      ContentHandler branch = new MatchingContentHandler(
            new AttributeMetadataHandler(META_NS, attribute, md, property), matcher);
      return new TeeContentHandler(ch, branch);
  }
View Full Code Here

Examples of org.apache.tika.sax.TeeContentHandler

  }

    protected ContentHandler getContentHandler(ContentHandler ch, Metadata md, ParseContext context) {
        // We can no longer extend DcXMLParser due to the handling of dc:subject and dc:date
        // Process the Dublin Core Attributes
        ch = new TeeContentHandler(super.getContentHandler(ch, md, context),
                getDublinCoreHandler(md, TikaCoreProperties.TITLE, "title"),
                getDublinCoreHandler(md, TikaCoreProperties.CREATOR, "creator"),
                getDublinCoreHandler(md, TikaCoreProperties.DESCRIPTION, "description"),
                getDublinCoreHandler(md, TikaCoreProperties.PUBLISHER, "publisher"),
                getDublinCoreHandler(md, TikaCoreProperties.CONTRIBUTOR, "contributor"),
                getDublinCoreHandler(md, TikaCoreProperties.TYPE, "type"),
                getDublinCoreHandler(md, TikaCoreProperties.FORMAT, "format"),
                getDublinCoreHandler(md, TikaCoreProperties.IDENTIFIER, "identifier"),
                getDublinCoreHandler(md, TikaCoreProperties.LANGUAGE, "language"),
                getDublinCoreHandler(md, TikaCoreProperties.RIGHTS, "rights"));
       
        // Process the OO Meta Attributes
        ch = getMeta(ch, md, TikaCoreProperties.CREATED, "creation-date");
        // ODF uses dc:date for modified
        ch = new TeeContentHandler(ch, new ElementMetadataHandler(
                DublinCore.NAMESPACE_URI_DC, "date",
                md, TikaCoreProperties.MODIFIED));
       
        // ODF uses dc:subject for description
        ch = new TeeContentHandler(ch, new ElementMetadataHandler(
                DublinCore.NAMESPACE_URI_DC, "subject",
                md, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT));
        ch = getMeta(ch, md, TikaCoreProperties.TRANSITION_KEYWORDS_TO_DC_SUBJECT, "keyword");
       
        ch = getMeta(ch, md, Property.externalText(MSOffice.EDIT_TIME), "editing-duration");       
View Full Code Here

Examples of org.apache.tika.sax.TeeContentHandler

                        }
                    }
                }
            };
            new HtmlParser().parse(
                    stream, new TeeContentHandler(body, link),
                    metadata, new ParseContext());
        } finally {
            stream.close();
        }
View Full Code Here

Examples of org.apache.tika.sax.TeeContentHandler

                metadata, property);
    }

    protected ContentHandler getContentHandler(
            ContentHandler handler, Metadata metadata, ParseContext context) {
        return new TeeContentHandler(
                super.getContentHandler(handler, metadata, context),
                getDublinCoreHandler(metadata, TikaCoreProperties.TITLE, "title"),
                getDublinCoreHandler(metadata, TikaCoreProperties.KEYWORDS, "subject"),
                getDublinCoreHandler(metadata, TikaCoreProperties.CREATOR, "creator"),
                getDublinCoreHandler(metadata, TikaCoreProperties.DESCRIPTION, "description"),
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.