Package cz.mallat.uasparser

Source Code of cz.mallat.uasparser.UASparser

package cz.mallat.uasparser;

import cz.mallat.uasparser.fileparser.Entry;
import cz.mallat.uasparser.fileparser.PHPFileParser;
import cz.mallat.uasparser.fileparser.Section;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* User agent parser.
*
* @author oli
*
*/
public class UASparser {

  static final String INFO_URL = "http://user-agent-string.info";

  private Map<String, RobotEntry> robotsMap;
  private Map<Long, OsEntry> osMap;
  private Map<Long, BrowserEntry> browserMap;
  private Map<Long, String> browserTypeMap;
  private Map<String, Long> browserRegMap;
  private Map<Long, Long> browserOsMap;
  private Map<String, Long> osRegMap;

  /**
   * Use the given filename to load the definition file from the local filesystem
   *
   * @param localDefinitionFilename
   * @throws IOException
   */
  public UASparser(String localDefinitionFilename) throws IOException {
    loadDataFromFile(new File(localDefinitionFilename));
  }

  /**
   * Use the given inputstream to load the definition file from the local filesystem
   *
   * @param inputStreamToDefinitionFile
   * @throws IOException
   */
  public UASparser(InputStream inputStreamToDefinitionFile) throws IOException {
    loadDataFromFile(inputStreamToDefinitionFile);
  }

  /**
   * Constructor for inherented classes
   */
  protected UASparser() {
    // empty
  }

  /**
   * When a class inherents from this class, it probably has to override this method
   */
  protected void checkDataMaps() throws IOException {
    // empty for this base class
  }

  /**
   * Parse the given user agent string and returns a UserAgentInfo object with the related data
   *
   * @param useragent
   * @throws IOException
   *             may happen when the retrieval of the data file fails
   * @return
   */
  public UserAgentInfo parse(String useragent) throws IOException {
    UserAgentInfo retObj = new UserAgentInfo();

    if (useragent == null) {
      return retObj;
    }
    useragent = useragent.trim();

    // check that the data maps are up-to-date
    checkDataMaps();

    // first check if it's a robot
    if (!processRobot(useragent, retObj)) {
      // search for a browser on the browser regex patterns
      boolean osFound = processBrowserRegex(useragent, retObj);

      if (!osFound) {
        // search the OS regex patterns for the used OS
        processOsRegex(useragent, retObj);
      }
    }
    return retObj;
  }

  /**
   * Searches in the os regex table. if found a match copies the os data
   *
   * @param useragent
   * @param retObj
   */
  private void processOsRegex(String useragent, UserAgentInfo retObj) {
    for (Map.Entry<String, Long> entry : osRegMap.entrySet()) {
      Pattern pattern = Pattern.compile(entry.getKey(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
      Matcher matcher = pattern.matcher(useragent);
      if (matcher.find()) {
        // simply copy the OS data into the result object
        Long idOs = entry.getValue();
        OsEntry os = osMap.get(idOs);
        if (os != null) {
          os.copyTo(retObj);
        }
        break;
      }
    }
  }

  /**
   * Searchs in the browser regex table. if found a match copies the browser data and if possible os data
   *
   * @param useragent
   * @param retObj
   * @return
   */
  private boolean processBrowserRegex(String useragent, UserAgentInfo retObj) {
    boolean osFound = false;
    for (Map.Entry<String, Long> entry : browserRegMap.entrySet()) {
      Pattern pattern = Pattern.compile(entry.getKey(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
      Matcher matcher = pattern.matcher(useragent);
      if (matcher.find()) {
        // if a browse was found...
        Long idBrowser = entry.getValue();
        // ... but the browser type from browser type map into the typ
        copyType(retObj, idBrowser);
        // get all the browser data from the browser map
        BrowserEntry be = browserMap.get(idBrowser);
        if (be != null) {
          // first try to get the browser version from the first subgroup of the regex
          String browserVersionInfo = null;
          if (matcher.groupCount() > 0) {
            browserVersionInfo = matcher.group(1);
          }
          // copy the browser data into the result
          be.copyTo(retObj, browserVersionInfo);
        }
        // check if this browser has exactly one OS mapped
        Long idOs = browserOsMap.get(idBrowser);
        if (idOs != null) {
          osFound = true;
          OsEntry os = osMap.get(idOs);
          if (os != null) {
            os.copyTo(retObj);
          }
        }
        break;
      }
    }
    return osFound;
  }

  /**
   * Sets the source type, if possible
   *
   * @param retObj
   * @param idBrowser
   */
  private void copyType(UserAgentInfo retObj, Long idBrowser) {
    BrowserEntry be = browserMap.get(idBrowser);
    if (be != null) {
      Long type = be.getType();
      if (type != null) {
        String typeString = browserTypeMap.get(type);
        if (typeString != null) {
          retObj.setTyp(typeString);
        }
      }
    }
  }

  /**
   * Checks if the useragent comes from a robot. if yes copies all the data to the result object
   *
   * @param useragent
   * @param retObj
   * @return true if the useragent belongs to a robot, else false
   */
  private boolean processRobot(String useragent, UserAgentInfo retObj) {
    if (robotsMap.containsKey(useragent)) {
      retObj.setTyp("Robot");
      RobotEntry robotEntry = robotsMap.get(useragent);
      robotEntry.copyTo(retObj);
      if (robotEntry.getOsId() != null) {
        OsEntry os = osMap.get(robotEntry.getOsId());
        if (os != null) {
          os.copyTo(retObj);
        }
      }
      return true;
    }
    return false;
  }

  /**
   * loads the data file and creates all internal data structs
   *
   * @param definitionFile
   * @throws IOException
   */
  protected void loadDataFromFile(File definitionFile) throws IOException {
    PHPFileParser fp = new PHPFileParser(definitionFile);
    createInternalDataStructre(fp.getSections());
  }

  /**
   * loads the data file and creates all internal data structs
   *
   * @param is
   * @throws IOException
   */
  protected void loadDataFromFile(InputStream is) throws IOException {
    PHPFileParser fp = new PHPFileParser(is);
    createInternalDataStructre(fp.getSections());
  }

  /**
   * Creates the internal data structes from the seciontList
   *
   * @param sectionList
   */
  protected void createInternalDataStructre(List<Section> sectionList) {
    for (Section sec : sectionList) {
      if ("robots".equals(sec.getName())) {
        Map<String, RobotEntry> robotsMapTmp = new HashMap<String, RobotEntry>();
        for (Entry en : sec.getEntries()) {
          RobotEntry re = new RobotEntry(en.getData());
          robotsMapTmp.put(re.getUserAgentString(), re);
        }
        robotsMap = robotsMapTmp;
      } else if ("os".equals(sec.getName())) {
        Map<Long, OsEntry> osMapTmp = new HashMap<Long, OsEntry>();
        for (Entry en : sec.getEntries()) {
          OsEntry oe = new OsEntry(en.getData());
          osMapTmp.put(Long.parseLong(en.getKey()), oe);
        }
        osMap = osMapTmp;
      } else if ("browser".equals(sec.getName())) {
        Map<Long, BrowserEntry> browserMapTmp = new HashMap<Long, BrowserEntry>();
        for (Entry en : sec.getEntries()) {
          BrowserEntry be = new BrowserEntry(en.getData());
          browserMapTmp.put(Long.parseLong(en.getKey()), be);
        }
        browserMap = browserMapTmp;
      } else if ("browser_type".equals(sec.getName())) {
        Map<Long, String> browserTypeMapTmp = new HashMap<Long, String>();
        for (Entry en : sec.getEntries()) {
          browserTypeMapTmp.put(Long.parseLong(en.getKey()), en.getData().iterator().next());
        }
        browserTypeMap = browserTypeMapTmp;
      } else if ("browser_reg".equals(sec.getName())) {
        Map<String, Long> browserRegMapTmp = new LinkedHashMap<String, Long>();
        for (Entry en : sec.getEntries()) {
          Iterator<String> it = en.getData().iterator();
          browserRegMapTmp.put(convertPerlToJavaRegex(it.next()), Long.parseLong(it.next()));
        }
        browserRegMap = browserRegMapTmp;
      } else if ("browser_os".equals(sec.getName())) {
        Map<Long, Long> browserOsMapTmp = new HashMap<Long, Long>();
        for (Entry en : sec.getEntries()) {
          browserOsMapTmp.put(Long.parseLong(en.getKey()), Long.parseLong(en.getData().iterator().next()));
        }
        browserOsMap = browserOsMapTmp;
      } else if ("os_reg".equals(sec.getName())) {
        Map<String, Long> osRegMapTmp = new LinkedHashMap<String, Long>();
        for (Entry en : sec.getEntries()) {
          Iterator<String> it = en.getData().iterator();
          osRegMapTmp.put(convertPerlToJavaRegex(it.next()), Long.parseLong(it.next()));
        }
        osRegMap = osRegMapTmp;
      }
    }
  }

  /**
   * Converts a PERL style regex into the Java style. That means in removes the leading and the last / and removes the modifiers
   *
   * @param regex
   * @return
   */
  private String convertPerlToJavaRegex(String regex) {
    regex = regex.substring(1);
    int lastIndex = regex.lastIndexOf('/');
    regex = regex.substring(0, lastIndex);
    return regex;
  }

}
TOP

Related Classes of cz.mallat.uasparser.UASparser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.