Examples of Detector


Examples of org.apache.tika.detect.Detector

     (an OOXML container with binary blobs), but we
     *  shouldn't break on these files either (TIKA-826
     */
    @Test
    public void testExcelXLSB() throws Exception {
       Detector detector = new DefaultDetector();
       AutoDetectParser parser = new AutoDetectParser();
      
       InputStream input = ExcelParserTest.class.getResourceAsStream(
             "/test-documents/testEXCEL.xlsb");
       Metadata m = new Metadata();
       m.add(Metadata.RESOURCE_NAME_KEY, "excel.xlsb");
      
       // Should be detected correctly
       MediaType type = null;
       try {
          type = detector.detect(input, m);
          assertEquals("application/vnd.ms-excel.sheet.binary.macroenabled.12", type.toString());
       } finally {
          input.close();
       }
      
View Full Code Here

Examples of org.apache.tika.detect.Detector

     * We don't currently support the old Excel 95 .xls file format,
     *  but we shouldn't break on these files either (TIKA-976
     */
    @Test
    public void testExcel95() throws Exception {
       Detector detector = new DefaultDetector();
       AutoDetectParser parser = new AutoDetectParser();
      
       InputStream input = ExcelParserTest.class.getResourceAsStream(
             "/test-documents/testEXCEL_95.xls");
       Metadata m = new Metadata();
       m.add(Metadata.RESOURCE_NAME_KEY, "excel_95.xls");
      
       // Should be detected correctly
       MediaType type = null;
       try {
          type = detector.detect(input, m);
          assertEquals("application/vnd.ms-excel", type.toString());
       } finally {
          input.close();
       }
      
View Full Code Here

Examples of org.apache.tika.detect.Detector

    }

    if (mediaType !=null) {
      metadata.add(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE, mediaType.toString());

      final Detector detector = parser.getDetector();

      parser.setDetector(new Detector() {
        public MediaType detect(InputStream inputStream, Metadata metadata) throws IOException {
          String ct = metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE);

          if (ct!=null) {
            return MediaType.parse(ct);
          } else {
            return detector.detect(inputStream, metadata);
          }
        }
      });
    }
  }
View Full Code Here

Examples of org.apache.tika.detect.Detector

    String[] segments = uriInfo.getRequestUri().toASCIIString()
        .split("/" + opkey + "/" + pathkey + "/");
    final String filename = segments[segments.length - 1];
    logger.info("resource :" + segments[segments.length - 1]);

    final Detector detector = createDetector(httpHeaders);
    final AutoDetectParser parser = new AutoDetectParser(detector);
    final ParseContext context = new ParseContext();
    context.set(Parser.class, parser);
    final org.apache.tika.metadata.Metadata metadata = new org.apache.tika.metadata.Metadata();
    setMetadataFromHeader(parser, metadata, httpHeaders);
View Full Code Here

Examples of org.apache.tika.detect.Detector

  @Produces({ MediaType.APPLICATION_JSON })
  @Path("/{opkey}")
  public StreamingOutput getMetadata(final InputStream is,
      @PathParam("opkey") final String opkey,
      @Context HttpHeaders httpHeaders) throws Exception {
    final Detector detector = createDetector(httpHeaders);
    final AutoDetectParser parser = new AutoDetectParser(detector);
    final ParseContext context = new ParseContext();
    context.set(Parser.class, parser);
    final org.apache.tika.metadata.Metadata metadata = new org.apache.tika.metadata.Metadata();
    setMetadataFromHeader(parser, metadata, httpHeaders);
View Full Code Here

Examples of org.apache.tika.detect.Detector

        && !mediaType
            .equals(javax.ws.rs.core.MediaType.APPLICATION_OCTET_STREAM_TYPE)) {
      metadata.add(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE,
          mediaType.toString());

      final Detector detector = parser.getDetector();

      parser.setDetector(new Detector() {
        public org.apache.tika.mime.MediaType detect(
            InputStream inputStream,
            org.apache.tika.metadata.Metadata metadata)
            throws IOException {
          String ct = metadata
              .get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE);
          logger.info("Content type " + ct);
          if (ct != null) {
            return org.apache.tika.mime.MediaType.parse(ct);
          } else {
            return detector.detect(inputStream, metadata);
          }
        }
      });
    }
  }
View Full Code Here

Examples of org.apache.tika.detect.Detector

    if (mediaType == null
        || mediaType
            .equals(javax.ws.rs.core.MediaType.APPLICATION_OCTET_STREAM_TYPE))
      return (new TikaConfig()).getMimeRepository();
    else
      return new Detector() {

        public org.apache.tika.mime.MediaType detect(
            InputStream inputStream,
            org.apache.tika.metadata.Metadata metadata)
            throws IOException {
View Full Code Here

Examples of org.apache.tika.detect.Detector

     * We don't currently support the .xlsb file format
     *  (an OOXML container with binary blobs), but we
     *  shouldn't break on these files either (TIKA-826
     */
    public void testExcelXLSB() throws Exception {
       Detector detector = new DefaultDetector();
       AutoDetectParser parser = new AutoDetectParser();
      
       InputStream input = ExcelParserTest.class.getResourceAsStream(
             "/test-documents/testEXCEL.xlsb");
       Metadata m = new Metadata();
       m.add(Metadata.RESOURCE_NAME_KEY, "excel.xlsb");
      
       // Should be detected correctly
       MediaType type = null;
       try {
          type = detector.detect(input, m);
          assertEquals("application/vnd.ms-excel.sheet.binary.macroenabled.12", type.toString());
       } finally {
          input.close();
       }
      
View Full Code Here

Examples of org.apache.tika.detect.Detector

    }
   
    protected MediaType getMediaType(BufferedInputStream inputStream, String fileName) throws IOException {
        final TikaInputStream tikaInputStreamStream = TikaInputStream.get(new CloseShieldInputStream(inputStream));
        try {
            final Detector detector = new DefaultDetector();
            final Metadata metadata = new Metadata();
            metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
           
            final MediaType type = detector.detect(tikaInputStreamStream, metadata);
            logger.debug("Determined '{}' for '{}'", type, fileName);
            return type;
        }
        catch (IOException e) {
            logger.warn("Failed to determine media type for '" + fileName + "' assuming XML", e);
View Full Code Here

Examples of org.exoplatform.services.chars.chardet.Detector

      FileInputStream input = new FileInputStream(file);
      BufferedInputStream buffer = new BufferedInputStream(input);
      byte[] data = new byte[buffer.available()];
      int available = -1;

      Detector det = new Detector(PSMDetector.ALL);

      //Set an observer...
      //The Notify() will be called when a matching charset is found.
      det.init(new ICharsetDetectionObserver()
      {
         public void notify(String charset)
         {
            System.out.println("CHARSET === " + charset);
         }
      });

      boolean done = false;
      boolean isAscii = true;

      while ((available = buffer.read(data)) > -1)
      {
         //Khoilv'code.
         //System.out.print(data);
         if (isAscii)
            isAscii = det.isAscii(data, available);

         //DoIt if non-ascii and not done yet.
         if (!isAscii && !done)
            done = det.doIt(data, available, false);
      }

      det.dataEnd();

      if (isAscii)
      {
         System.out.println("CHARSET = ASCII");
         found = true;
      }

      if (!found)
      {
         String prob[] = det.getProbableCharsets();
         for (int i = 0; i < prob.length; i++)
         {
            System.out.println("Probable Charset = " + prob[i]);
         }
      }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.