Package org.apache.oodt.cas.pushpull.filerestrictions.parsers

Source Code of org.apache.oodt.cas.pushpull.filerestrictions.parsers.GenericEmailParser

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.oodt.cas.pushpull.filerestrictions.parsers;

//OODT imports
import org.apache.log4j.Logger;
import org.apache.oodt.cas.pushpull.filerestrictions.Parser;
import org.apache.oodt.cas.pushpull.filerestrictions.VirtualFile;
import org.apache.oodt.cas.pushpull.filerestrictions.VirtualFileStructure;
import org.apache.oodt.cas.pushpull.exceptions.ParserException;

//Google imports
import com.google.common.base.Strings;
import com.google.common.collect.Lists;

//JDK imports
import java.io.FileInputStream;
import java.util.List;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* A generic email parser which generates file paths to be downloaded by using a defined java
* Pattern. The pattern should specify pattern groups for file paths in the matching pattern.
* These groups will then be extracted and added to the file structure.
*
* @author bfoster@apache.org (Brian Foster)
*/
public class GenericEmailParser implements Parser {

  private static final Logger log = Logger.getLogger(GenericEmailParser.class);

  public static final String FILE_PATTERNS_PROPERTY_NAME =
      "org.apache.oodt.cas.pushpull.generic.email.parser.file.pattern";
  public static final String CHECK_FOR_PATTERN_PROPERTY_NAME =
      "org.apache.oodt.cas.pushpull.generic.email.parser.check.for.pattern";
  public static final String PATH_TO_ROOT_PROPERTY_NAME =
      "org.apache.oodt.cas.pushpull.generic.email.parser.path.to.root";

  private final String filePattern;
  private final String checkForPattern;
  private final String pathToRoot;

  public GenericEmailParser() {
    filePattern = loadFilePattern();
    checkForPattern = loadCheckForPattern();
    pathToRoot = loadPathToRoot();
  }
  public GenericEmailParser(String filePattern, String checkForPattern, String pathToRoot) {
    this.filePattern = filePattern;
    this.checkForPattern = checkForPattern;
    this.pathToRoot = Strings.nullToEmpty(pathToRoot);
  }

  @Override
  public VirtualFileStructure parse(FileInputStream emailFile) throws ParserException {
    log.info("GenericEmailParser is parsing email: " + emailFile);

    VirtualFile root = VirtualFile.createRootDir();

    String emailText = readEmail(emailFile);
    if (!isValidEmail(emailText)) {
      throw new ParserException("Failed to find check for pattern in email: " + checkForPattern);
    }
    List<String> filePaths = generateFilePaths(emailText);

    for (String filePath : filePaths) {
      new VirtualFile(root, pathToRoot + filePath, false);
    }

    return new VirtualFileStructure("/", root);
  }

  private String readEmail(FileInputStream emailFile) {
    StringBuilder emailText = new StringBuilder("");
    Scanner scanner = new Scanner(emailFile);
    while (scanner.hasNextLine()) {
      emailText.append(scanner.nextLine()).append("\n");
    }
    scanner.close();
    return emailText.toString();
  }

  private List<String> generateFilePaths(String emailText) throws ParserException {
    List<String> filePaths = Lists.newArrayList();
    Pattern pattern = Pattern.compile(filePattern);
    Matcher m = pattern.matcher(emailText);
    if (m.find()) {
      // Ignore index 0, as that is the matching string for pattern.
      for (int i = 1; i <= m.groupCount(); i++) {
        filePaths.add(m.group(i));         
      }
    }
    return filePaths;
  }

  private boolean isValidEmail(String emailText) {
    Pattern pattern = Pattern.compile(checkForPattern);
    Matcher m = pattern.matcher(emailText.replaceAll("\n", " "));
    return m.find();
  }
 
  private String loadFilePattern() {
    return System.getProperty(FILE_PATTERNS_PROPERTY_NAME);
  }
 
  private String loadCheckForPattern() {
    return System.getProperty(CHECK_FOR_PATTERN_PROPERTY_NAME);   
  }
 
  private String loadPathToRoot() {
    return Strings.nullToEmpty(System.getProperty(PATH_TO_ROOT_PROPERTY_NAME));
  }
}
TOP

Related Classes of org.apache.oodt.cas.pushpull.filerestrictions.parsers.GenericEmailParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.