Package cz.mallat.uasparser

Source Code of cz.mallat.uasparser.UASparser

package cz.mallat.uasparser;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import jregex.Matcher;
import jregex.Pattern;
import cz.mallat.uasparser.fileparser.Entry;
import cz.mallat.uasparser.fileparser.PHPFileParser;
import cz.mallat.uasparser.fileparser.Section;

/**
* User agent parser.
*
* Thread-safe, however also see the {@link MultithreadedUASparser} for a faster variant.
*
* @author oli
*
*/
public class UASparser {

    static final String INFO_URL = "http://user-agent-string.info";
    static final String ROBOT = "Robot";
    static final Long DEVICE_ID_OTHER = 1L;
    static final Long DEVICE_ID_DESKTOP = 2L;
    static final Long DEVICE_ID_SMARTPHONE = 3L;

    protected Map<String, RobotEntry> robotsMap;
    protected Map<Long, OsEntry> osMap;
    protected Map<Long, BrowserEntry> browserMap;
    protected Map<Long, String> browserTypeMap;
    protected Map<String, Long> browserRegMap;
    protected Map<Long, Long> browserOsMap;
    protected Map<String, Long> osRegMap;
    protected Map<Long, DeviceEntry> deviceMap;
    protected Map<String, Long> deviceRegMap;

    protected Map<Pattern, Long> compiledBrowserRegMap;
    protected Map<Pattern, Long> compiledOsRegMap;
    protected Map<Pattern, Long> compiledDeviceRegMap;

    protected UserAgentInfo unknownAgentInfo;

    /**
     * Create a new {@link UASparser} without initializing maps. Expects an updater to be
     * configured and run immediately.
     */
    public UASparser() {
    }

    /**
     * Use the given filename to load the definition file from the local filesystem
     *
     * @param localDefinitionFilename
     * @throws IOException
     */
    public UASparser(String localDefinitionFilename) throws IOException {
        loadDataFromFile(new File(localDefinitionFilename));
        unknownAgentInfo = new UserAgentInfo();
    }

    /**
     * Use the given inputstream to load the definition file from the local filesystem
     *
     * @param inputStreamToDefinitionFile
     * @throws IOException
     */
    public UASparser(InputStream inputStreamToDefinitionFile) throws IOException {
        loadDataFromFile(inputStreamToDefinitionFile);
        unknownAgentInfo = new UserAgentInfo();
    }

    /**
     * When a class inherits from this class, it probably has to override this method
     */
    @Deprecated
    protected void checkDataMaps() throws IOException {
        // empty for this base class
    }

    /**
     * Parse the given user agent string and returns a UserAgentInfo object with the related data
     *
     * @param useragent
     * @throws IOException
     *             may happen when the retrieval of the data file fails
     * @return
     */
    public UserAgentInfo parse(String useragent) throws IOException {
        if (useragent == null) {
            return unknownAgentInfo;
        }

        UserAgentInfo uaInfo = new UserAgentInfo();
        useragent = useragent.trim();

        // check that the data maps are up-to-date (deprecated)
        checkDataMaps();

        // first check if it's a robot
        if (processRobot(useragent, uaInfo)) {
            return uaInfo;
        }

        // it's not a robot, so search for a browser on the browser regex patterns
        processBrowserRegex(useragent, uaInfo);
        if (!uaInfo.hasOsInfo()) {
            // search the OS regex patterns for the used OS
            processOsRegex(useragent, uaInfo);
        }

        // search the device regex patterns to set the according device
        processDeviceRegex(useragent, uaInfo);
        if (!uaInfo.hasDeviceInfo()) {
            guessDeviceType(uaInfo);
        }

        return uaInfo;
    }

    /**
     * Determine device type based on UA type field
     * @param uaInfo
     */
    protected void guessDeviceType(UserAgentInfo uaInfo) {
        if (compiledDeviceRegMap == null || deviceMap == null) {
            return;
        }

        String type = uaInfo.getType();
        if (type == null || type.isEmpty()) {
            return;
        }

        if (type.equals("Other") || type.equals("Library") || type.equals("Useragent Anonymizer")) {
            uaInfo.setDeviceEntry(deviceMap.get(DEVICE_ID_OTHER));
        } else if (type.equals("Mobile Browser") || type.equals("Wap Browser")) {
            uaInfo.setDeviceEntry(deviceMap.get(DEVICE_ID_SMARTPHONE));
        } else {
            uaInfo.setDeviceEntry(deviceMap.get(DEVICE_ID_DESKTOP));
        }
    }

    /**
     * Parse the given user agent string and returns a UserAgentInfo object
     * with only the related Browser data set.
     *
     * @param useragent
     * @return {@link UserAgentInfo}
     */
    public UserAgentInfo parseBrowserOnly(String useragent) {
        if (useragent == null) {
            return unknownAgentInfo;
        }

        UserAgentInfo uaInfo = new UserAgentInfo();
        processBrowserRegex(useragent, uaInfo);
        return uaInfo;
    }

    /**
     * Precompile all regular regexes
     */
    protected void preCompileRegExes() {
        preCompileBrowserRegMap();
        preCompileOsRegMap();
        preCompileDeviceRegMap();
    }

    /**
     * Precompile browser regexes
     */
    protected void preCompileBrowserRegMap() {
        LinkedHashMap<Pattern, Long> compiledBrowserRegMap = new LinkedHashMap<Pattern, Long>(browserRegMap.size());
        for (Map.Entry<String, Long> entry : browserRegMap.entrySet()) {
            Pattern pattern = new Pattern(entry.getKey(), Pattern.IGNORE_CASE | Pattern.DOTALL);
            compiledBrowserRegMap.put(pattern, entry.getValue());
        }
        this.compiledBrowserRegMap = compiledBrowserRegMap;
    }

    /**
     * Precompile OS regexes
     */
    protected void preCompileOsRegMap() {
        LinkedHashMap<Pattern, Long> compiledOsRegMap = new LinkedHashMap<Pattern, Long>(osRegMap.size());
        for (Map.Entry<String, Long> entry : osRegMap.entrySet()) {
            Pattern pattern = new Pattern(entry.getKey(), Pattern.IGNORE_CASE | Pattern.DOTALL);
            compiledOsRegMap.put(pattern, entry.getValue());
        }
        this.compiledOsRegMap = compiledOsRegMap;
    }

    /**
     * Precompile device regexes
     */
    protected void preCompileDeviceRegMap() {
        if (deviceRegMap != null) {
        LinkedHashMap<Pattern, Long> compiledDeviceRegMap = new LinkedHashMap<Pattern, Long>(deviceRegMap.size());
          for (Map.Entry<String, Long> entry : deviceRegMap.entrySet()) {
              Pattern pattern = new Pattern(entry.getKey(), Pattern.IGNORE_CASE | Pattern.DOTALL);
              compiledDeviceRegMap.put(pattern, entry.getValue());
          }
          this.compiledDeviceRegMap = compiledDeviceRegMap;
        }
    }

    /**
     * Checks if the User Agent matches that of a known Robot (crawler or other automated agent)
     *
     * @param useragent
     * @param uaInfo
     */
    protected boolean processRobot(String useragent, UserAgentInfo uaInfo) {
        // Robots UAs must match *exactly*, hence we use a simple hash lookup and not a regex match
        if (!robotsMap.containsKey(useragent)) {
            return false;
        }

        uaInfo.setType(ROBOT);
        RobotEntry robotEntry = robotsMap.get(useragent);
        uaInfo.setRobotEntry(robotEntry);
        if (robotEntry.getOsId() != null) {
            uaInfo.setOsEntry(osMap.get(robotEntry.getOsId()));
        }

        if (compiledDeviceRegMap != null && deviceMap != null) {
            // Set device to 'other'
            uaInfo.setDeviceEntry(deviceMap.get(DEVICE_ID_OTHER));
        }
        return true;
    }

    /**
     * Searchs in the browser regex table. if found a match copies the browser data and if possible os data
     *
     * @param useragent
     * @param uaInfo
     */
    protected void processBrowserRegex(String useragent, UserAgentInfo uaInfo) {
        for (Map.Entry<Pattern, Long> entry : compiledBrowserRegMap.entrySet()) {
            Matcher matcher = entry.getKey().matcher(useragent);
            if (matcher.find()) {
                Long idBrowser = entry.getValue();
                BrowserEntry be = browserMap.get(idBrowser);
                if (be != null) {
                    uaInfo.setType(browserTypeMap.get(be.getType()));;
                    if (matcher.groupCount() > 1) {
                        uaInfo.setBrowserVersionInfo(matcher.group(1));
                    }
                    uaInfo.setBrowserEntry(be);
                }
                // check if this browser has exactly one OS mapped
                Long idOs = browserOsMap.get(idBrowser);
                if (idOs != null) {
                    uaInfo.setOsEntry(osMap.get(idOs));
                }
                return;
            }
        }
    }

    /**
     * Searches in the os regex table. if found a match copies the os data
     *
     * @param useragent
     * @param uaInfo
     */
    protected void processOsRegex(String useragent, UserAgentInfo uaInfo) {
        for (Map.Entry<Pattern, Long> entry : compiledOsRegMap.entrySet()) {
            Matcher matcher = entry.getKey().matcher(useragent);
            if (matcher.find()) {
                uaInfo.setOsEntry(osMap.get(entry.getValue()));
                return;
            }
        }
    }

    /**
     * Searches in the devices regex table. if found a match copies the device data
     *
     * @param useragent
     * @param uaInfo
     */
    protected void processDeviceRegex(String useragent, UserAgentInfo uaInfo) {
        if (compiledDeviceRegMap != null && deviceMap != null) {
        for (Map.Entry<Pattern, Long> entry : compiledDeviceRegMap.entrySet()) {
              Matcher matcher = entry.getKey().matcher(useragent);
              if (matcher.find()) {
                  uaInfo.setDeviceEntry(deviceMap.get(entry.getValue()));
                  return;
              }
          }
        }
    }

    /**
     * loads the data file and creates all internal data structures
     *
     * @param definitionFile
     * @throws IOException
     */
    protected void loadDataFromFile(File definitionFile) throws IOException {
        PHPFileParser fp = new PHPFileParser(definitionFile);
        createInternalDataStructure(fp.getSections());
    }

    /**
     * loads the data file and creates all internal data structs
     *
     * @param is
     * @throws IOException
     */
    protected void loadDataFromFile(InputStream is) throws IOException {
        PHPFileParser fp = new PHPFileParser(is);
        createInternalDataStructure(fp.getSections());
    }

    /**
     * Creates the internal data structures from the sectionList
     *
     * @param sectionList
     */
    protected void createInternalDataStructure(List<Section> sectionList) {
        for (Section sec : sectionList) {
            if ("robots".equals(sec.getName())) {
                Map<String, RobotEntry> robotsMapTmp = new HashMap<String, RobotEntry>();
                for (Entry en : sec.getEntries()) {
                    RobotEntry re = new RobotEntry(en.getData());
                    robotsMapTmp.put(re.getUserAgentString(), re);
                }
                robotsMap = robotsMapTmp;
            } else if ("os".equals(sec.getName())) {
                Map<Long, OsEntry> osMapTmp = new HashMap<Long, OsEntry>();
                for (Entry en : sec.getEntries()) {
                    OsEntry oe = new OsEntry(en.getData());
                    osMapTmp.put(Long.parseLong(en.getKey()), oe);
                }
                osMap = osMapTmp;
            } else if ("browser".equals(sec.getName())) {
                Map<Long, BrowserEntry> browserMapTmp = new HashMap<Long, BrowserEntry>();
                for (Entry en : sec.getEntries()) {
                    BrowserEntry be = new BrowserEntry(en.getData());
                    browserMapTmp.put(Long.parseLong(en.getKey()), be);
                }
                browserMap = browserMapTmp;
            } else if ("browser_type".equals(sec.getName())) {
                Map<Long, String> browserTypeMapTmp = new HashMap<Long, String>();
                for (Entry en : sec.getEntries()) {
                    browserTypeMapTmp.put(Long.parseLong(en.getKey()), en.getData().iterator().next());
                }
                browserTypeMap = browserTypeMapTmp;
            } else if ("browser_reg".equals(sec.getName())) {
                Map<String, Long> browserRegMapTmp = new LinkedHashMap<String, Long>();
                for (Entry en : sec.getEntries()) {
                    Iterator<String> it = en.getData().iterator();
                    browserRegMapTmp.put(convertPerlToJavaRegex(it.next()), Long.parseLong(it.next()));
                }
                browserRegMap = browserRegMapTmp;
            } else if ("browser_os".equals(sec.getName())) {
                Map<Long, Long> browserOsMapTmp = new HashMap<Long, Long>();
                for (Entry en : sec.getEntries()) {
                    browserOsMapTmp.put(Long.parseLong(en.getKey()), Long.parseLong(en.getData().iterator().next()));
                }
                browserOsMap = browserOsMapTmp;
            } else if ("os_reg".equals(sec.getName())) {
                Map<String, Long> osRegMapTmp = new LinkedHashMap<String, Long>();
                for (Entry en : sec.getEntries()) {
                    Iterator<String> it = en.getData().iterator();
                    osRegMapTmp.put(convertPerlToJavaRegex(it.next()), Long.parseLong(it.next()));
                }
                osRegMap = osRegMapTmp;
            } else if ("device".equals(sec.getName())) {
              Map<Long, DeviceEntry> deviceMapTmp = new HashMap<Long, DeviceEntry>();
                for (Entry en : sec.getEntries()) {
                  DeviceEntry de = new DeviceEntry(en.getData());
                  deviceMapTmp.put(Long.parseLong(en.getKey()), de);
                }
                deviceMap = deviceMapTmp;
            } else if ("device_reg".equals(sec.getName())) {
                Map<String, Long> deviceRegMapTmp = new LinkedHashMap<String, Long>();
                for (Entry en : sec.getEntries()) {
                    Iterator<String> it = en.getData().iterator();
                    deviceRegMapTmp.put(convertPerlToJavaRegex(it.next()), Long.parseLong(it.next()));
                }
                deviceRegMap = deviceRegMapTmp;
            }
        }
        preCompileRegExes();
    }

    /**
     * Converts a PERL style regex into the Java style. That means in removes the leading and the last / and removes the modifiers
     *
     * @param regex
     * @return
     */
    protected String convertPerlToJavaRegex(String regex) {
        regex = regex.substring(1);
        int lastIndex = regex.lastIndexOf('/');
        regex = regex.substring(0, lastIndex);
        return regex;
    }

}
TOP

Related Classes of cz.mallat.uasparser.UASparser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.