Package org.apache.mahout.utils.email

Examples of org.apache.mahout.utils.email.MailProcessor


  private static final Logger log = LoggerFactory.getLogger(SequenceFilesFromMailArchives.class);

  public void createSequenceFiles(MailOptions options) throws IOException {
    ChunkedWriter writer = new ChunkedWriter(new Configuration(), options.getChunkSize(), new Path(options.getOutputDir()));
    MailProcessor processor = new MailProcessor(options, options.getPrefix(), writer);
    try {
      if (options.getInput().isDirectory()) {
        PrefixAdditionFilter filter = new PrefixAdditionFilter(processor, writer);
        options.getInput().listFiles(filter);
        log.info("Parsed {} messages from {}", filter.getMessageCount(), options.getInput().getAbsolutePath());
      } else {
        long start = System.currentTimeMillis();
        long cnt = processor.parseMboxLineByLine(options.getInput());
        long finish = System.currentTimeMillis();
        log.info("Parsed {} messages from {} in time: {}",
                 new Object[] { cnt, options.getInput().getAbsolutePath(), (finish - start) });
      }
    } finally {
View Full Code Here


    @Override
    public boolean accept(File current) {
      if (current.isDirectory()) {
        log.info("At {}", current.getAbsolutePath());
        PrefixAdditionFilter nested = new PrefixAdditionFilter(new MailProcessor(
            processor.getOptions(), processor.getPrefix() + File.separator + current.getName(), writer), writer);
        current.listFiles(nested);
        long dirCount = nested.getMessageCount();
        log.info("Parsed {} messages from directory {}", dirCount, current.getAbsolutePath());
        messageCount += dirCount;
View Full Code Here

  private static final int MAX_JOB_SPLIT_LOCATIONS = 1000000;

  public void createSequenceFiles(MailOptions options) throws IOException {
    ChunkedWriter writer = new ChunkedWriter(getConf(), options.getChunkSize(), new Path(options.getOutputDir()));
    MailProcessor processor = new MailProcessor(options, options.getPrefix(), writer);
    try {
      if (options.getInput().isDirectory()) {
        PrefixAdditionFilter filter = new PrefixAdditionFilter(processor, writer);
        options.getInput().listFiles(filter);
        log.info("Parsed {} messages from {}", filter.getMessageCount(), options.getInput().getAbsolutePath());
      } else {
        long start = System.currentTimeMillis();
        long cnt = processor.parseMboxLineByLine(options.getInput());
        long finish = System.currentTimeMillis();
        log.info("Parsed {} messages from {} in time: {}", cnt, options.getInput().getAbsolutePath(), finish - start);
      }
    } finally {
      Closeables.close(writer, false);
View Full Code Here

    @Override
    public boolean accept(File current) {
      if (current.isDirectory()) {
        log.info("At {}", current.getAbsolutePath());
        PrefixAdditionFilter nested = new PrefixAdditionFilter(
          new MailProcessor(processor.getOptions(), processor.getPrefix()
            + File.separator + current.getName(), writer), writer);
        current.listFiles(nested);
        long dirCount = nested.getMessageCount();
        log.info("Parsed {} messages from directory {}", dirCount, current.getAbsolutePath());
        messageCount += dirCount;
View Full Code Here

  private static final int MAX_JOB_SPLIT_LOCATIONS = 1000000;

  public void createSequenceFiles(MailOptions options) throws IOException {
    ChunkedWriter writer = new ChunkedWriter(getConf(), options.getChunkSize(), new Path(options.getOutputDir()));
    MailProcessor processor = new MailProcessor(options, options.getPrefix(), writer);
    try {
      if (options.getInput().isDirectory()) {
        PrefixAdditionDirectoryWalker walker = new PrefixAdditionDirectoryWalker(processor, writer);
        walker.walk(options.getInput());
        log.info("Parsed {} messages from {}", walker.getMessageCount(), options.getInput().getAbsolutePath());
      } else {
        long start = System.currentTimeMillis();
        long cnt = processor.parseMboxLineByLine(options.getInput());
        long finish = System.currentTimeMillis();
        log.info("Parsed {} messages from {} in time: {}", cnt, options.getInput().getAbsolutePath(), finish - start);
      }
    } finally {
      Closeables.close(writer, false);
View Full Code Here

    @Override
    protected void handleDirectoryStart(File current, int depth, Collection<Object> results) throws IOException {
      if (depth > 0) {
        log.info("At {}", current.getAbsolutePath());
        MailProcessor processor = processors.getFirst();
        MailProcessor subDirProcessor = new MailProcessor(processor.getOptions(), processor.getPrefix()
            + File.separator + current.getName(), writer);
        processors.push(subDirProcessor);
        messageCounts.push(0L);
      }
    }
View Full Code Here

      return files;
    }

    @Override
    protected void handleFile(File current, int depth, Collection<Object> results) throws IOException {
      MailProcessor processor = processors.getFirst();
      long currentDirMessageCount = messageCounts.pop();
      try {
        currentDirMessageCount += processor.parseMboxLineByLine(current);
      } catch (IOException e) {
        throw new IllegalStateException("Error processing " + current, e);
      }
      messageCounts.push(currentDirMessageCount);
    }
View Full Code Here

      SequenceFilesFromMailArchives.class);

  public void createSequenceFiles(MailOptions options) throws IOException {
    ChunkedWriter writer = new ChunkedWriter(
        getConf(), options.getChunkSize(), new Path(options.getOutputDir()));
    MailProcessor processor = new MailProcessor(
        options, options.getPrefix(), writer);
    try {
      if (options.getInput().isDirectory()) {
        PrefixAdditionFilter filter = new PrefixAdditionFilter(
            processor, writer);
        options.getInput().listFiles(filter);
        log.info("Parsed {} messages from {}", filter.getMessageCount(),
            options.getInput().getAbsolutePath());
      } else {
        long start = System.currentTimeMillis();
        long cnt = processor.parseMboxLineByLine(options.getInput());
        long finish = System.currentTimeMillis();
        log.info("Parsed {} messages from {} in time: {}", new Object[] {
            cnt, options.getInput().getAbsolutePath(), finish - start});
      }
    } finally {
View Full Code Here

    @Override
    public boolean accept(File current) {
      if (current.isDirectory()) {
        log.info("At {}", current.getAbsolutePath());
        PrefixAdditionFilter nested = new PrefixAdditionFilter(
            new MailProcessor(processor.getOptions(), processor.getPrefix()
                + File.separator + current.getName(), writer), writer);
        current.listFiles(nested);
        long dirCount = nested.getMessageCount();
        log.info("Parsed {} messages from directory {}", dirCount,
            current.getAbsolutePath());
View Full Code Here

TOP

Related Classes of org.apache.mahout.utils.email.MailProcessor

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.