Examples of TemporaryResources


Examples of org.apache.tika.io.TemporaryResources

    if (!ExternalParser.check(checkCmd))
      return;

    XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);

    TemporaryResources tmp = new TemporaryResources();
    File output = null;
    try {
      TikaInputStream tikaStream = TikaInputStream.get(stream, tmp);
      File input = tikaStream.getFile();
      long size = tikaStream.getLength();

      if (size >= config.getMinFileSizeToOcr() && size <= config.getMaxFileSizeToOcr()) {

        output = tmp.createTemporaryFile();
        doOCR(input, output, config);

        // Tesseract appends .txt to output file name
        output = new File(output.getAbsolutePath() + ".txt");

        if (output.exists())
          extractOutput(new FileInputStream(output), xhtml);

      }

    } finally {
      tmp.dispose();
      if (output != null)
        output.delete();

    }
  }
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        XHTMLContentHandler xhtml =
            new XHTMLContentHandler(handler, metadata);

        TemporaryResources tmp = new TemporaryResources();
        try {
            parse(TikaInputStream.get(stream, tmp),
                    xhtml, metadata, tmp);
        } finally {
            tmp.dispose();
        }
    }
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

        // Check if we have access to the document
        if (input == null) {
            return MediaType.OCTET_STREAM;
        }

        TemporaryResources tmp = new TemporaryResources();
        try {
            TikaInputStream tis = TikaInputStream.get(input, tmp);

            byte[] prefix = new byte[1024]; // enough for all known formats
            int length = tis.peek(prefix);

            MediaType type = detectArchiveFormat(prefix, length);
            if (PackageParser.isZipArchive(type)
                    && TikaInputStream.isTikaInputStream(input)) {
                return detectZipFormat(tis);
            } else if (!type.equals(MediaType.OCTET_STREAM)) {
                return type;
            } else {
                return detectCompressorFormat(prefix, length);
            }
        } finally {
            try {
                tmp.dispose();
            } catch (TikaException e) {
                // ignore
            }
        }
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.