Package org.commoncrawl.server

Source Code of org.commoncrawl.server.CommonCrawlServer

package org.commoncrawl.server;

import java.io.File;
import java.io.IOException;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.NetworkInterface;
import java.net.SocketException;
import java.net.UnknownHostException;
import java.util.Enumeration;
import java.util.TreeMap;
import java.util.Vector;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.util.StringUtils;
import org.commoncrawl.async.EventLoop;
import org.commoncrawl.rpc.RPCActorService;
import org.commoncrawl.util.shared.CCStringUtils;

public abstract class CommonCrawlServer extends RPCActorService {

  public static final Log          LOG                            = LogFactory
                                                                      .getLog(CommonCrawlServer.class);

  private static final int         DEFAULT_MAX_THREADPOOL_THREADS = 5;

  private static final int         DEFAULT_DNS_THREAD_POOL_SIZE   = 50;
  public static final String       DNS_POOL_NAME                  = "dns";

  protected Configuration          _configuration;
  private static CommonCrawlServer _serverSingleton;

  protected static final class CommonConfig {

    public String _className;
    public String _hostName;
    public String _rpcInterface;
    public int    _rpcPort           = -1;
    public String _webInterface;
    public int    _webPort           = -1;
    public String _configName;
    public String _dataDir           = null;
    public int    _dnsThreadPoolSize = -1;
    public boolean _dumpAppName = false;
  }

  protected static CommonConfig            _commonConfig;
  protected String                         _hostName;
  protected InetSocketAddress              _serverAddress;
  protected File                           _dataDir;
  protected WebServer                      _webServer;
  protected boolean                        _useAsyncWebDispatch  = false;
  private int                              _dnsThreadPoolSize    = DEFAULT_DNS_THREAD_POOL_SIZE;
  protected EventLoop                      _eventLoop            = null;
  /** default thread pool */
  private ExecutorService                  _defaultThreadPool;
  /** registered thread pools */
  private TreeMap<String, ExecutorService> _threadPoolMap        = new TreeMap<String, ExecutorService>();
  /** detail logging **/
  private static boolean                   _detailLoggingEnabled = false;
  /** filesystem access **/
  FileSystem _fileSystem    = null;

  public CommonCrawlServer() {
    super(null);
    _serverSingleton = this;
  }

  private static ThreadLocal<CommonCrawlServer> _serverObject = new ThreadLocal<CommonCrawlServer>() {
                                                                @Override
                                                                protected CommonCrawlServer initialValue() {
                                                                  return _serverSingleton;
                                                                };
                                                              };

  public static CommonCrawlServer getServerSingleton() {
    return (CommonCrawlServer) _serverObject.get();
  }

  protected void overrideConfig(Configuration conf) {
  }

  protected abstract boolean parseArguements(String argv[]);

  protected abstract String getDefaultRPCInterface();

  protected abstract int getDefaultRPCPort();

  protected abstract String getDefaultHttpInterface();

  protected abstract int getDefaultHttpPort();

  protected abstract String getAppName();

  protected abstract String getDefaultDataDir();

  protected abstract void printUsage();

  protected abstract boolean initServer();

  protected abstract boolean startDaemons();

  protected abstract void stopDaemons();

  public Configuration getHadoopConfig() {
    return _configuration;
  }

  public String getServerName() {
    return this.getClass().getSimpleName();
  }

  public EventLoop getEventLoop() {
    return _eventLoop;
  }

  public synchronized ExecutorService getDefaultThreadPool() {
    return _defaultThreadPool;
  }

  public ExecutorService registerThreadPool(String threadPoolId, int maxThreads) {
    ExecutorService service = null;
    synchronized (_threadPoolMap) {
      service = _threadPoolMap.get(threadPoolId);

      if (service == null) {
        service = Executors.newFixedThreadPool(maxThreads);
        _threadPoolMap.put(threadPoolId, service);
      }
    }
    return service;
  }

  public ExecutorService getThreadPool(String threadPoolId) {

    ExecutorService service = null;
    synchronized (_threadPoolMap) {
      service = _threadPoolMap.get(threadPoolId);
    }
    return service;
  }

  public void terminateDefaultThreadPool() {

    ExecutorService oldThreadPool = null;
    synchronized (this) {

      oldThreadPool = _defaultThreadPool;
      _defaultThreadPool = Executors.newFixedThreadPool(_configuration.getInt(
          "org.commoncrawl.threadpool.max.threads",
          DEFAULT_MAX_THREADPOOL_THREADS));

    }
    oldThreadPool.shutdown();
    try {
      while (!oldThreadPool.awaitTermination(1000, TimeUnit.MILLISECONDS)) {
        LOG.info("Awaiting shutdown for Default ThreadPool");
      }
    } catch (InterruptedException e) {
      LOG.error(CCStringUtils.stringifyException(e));
    }

  }

  public void terminateThreadPool(String threadPoolId) {
    ExecutorService service = null;
    synchronized (_threadPoolMap) {
      service = _threadPoolMap.remove(threadPoolId);

    }

    if (service != null) {
      LOG.info("Terminating ThreadPool:" + threadPoolId);
      service.shutdown();
      try {
        while (!service.awaitTermination(1000, TimeUnit.MILLISECONDS)) {
          LOG.info("Awaiting shutdown for ThreadPool:" + threadPoolId);
        }
      } catch (InterruptedException e) {
        LOG.error(CCStringUtils.stringifyException(e));
      }
    }
  }

  public void setAsyncWebDispatch(boolean asyncWebDispatch) {
    _useAsyncWebDispatch = asyncWebDispatch;
  }

  public FileSystem getFileSystem() {
    return _fileSystem;
  }
 
  public String getHostName() {
    return _hostName;
  }

  public InetSocketAddress getServerAddress() {
    return _serverAddress;
  }

  public File getDataDirectory() {
    return _dataDir;
  }

  public File getLogDirectory() {
    return new File(System.getProperty("commoncrawl.log.dir"));
  }

  public WebServer getWebServer() {
    return _webServer;
  }

  public static boolean detailLoggingEnabled() {
    return _detailLoggingEnabled;
  }

  protected final boolean init(String argv[], Configuration conf)
      throws IOException {

    _configuration = conf;

    _fileSystem = FileSystem.get(_configuration);
   
    LOG.info("Hadoop FileSystem is:" + _fileSystem.getUri().toString());
   
    if (!parseArguements(argv)) {
      printUsage();
      return false;
    }

    overrideConfig(conf);

    // initialize the default thread pool
    _defaultThreadPool = Executors.newFixedThreadPool(_configuration.getInt(
        "org.commoncrawl.threadpool.max.threads",
        DEFAULT_MAX_THREADPOOL_THREADS));

    _hostName = _configuration.get("org.commoncrawl.hostname");

    if (_hostName == null) {

      // get host name via other meands
      InetAddress localHostAddr = InetAddress.getLocalHost();
      _hostName = localHostAddr.getHostName();
      _configuration.set("org.commoncrawl.hostname", _hostName);
    }
    if (detailLoggingEnabled())
      LOG.info("Hostname is: " + _hostName);

    String rpcInterface = _configuration.get("org.commoncrawl.rpcInterface",
        getDefaultRPCInterface());
    String webInterface = _configuration.get("org.commoncrawl.httpInterface",
        getDefaultHttpInterface());
    int rpcPort = _configuration.getInt("org.commoncrawl.rpcPort",
        getDefaultRPCPort());
    int httpPort = _configuration.getInt("org.commoncrawl.httpPort",
        getDefaultHttpPort());
    String dataDirectory = _configuration.get("org.commoncrawl.dataDir",
        getDefaultDataDir());
    _dnsThreadPoolSize = _configuration.getInt(
        "org.commoncrawl.dnsThreadPoolSize", DEFAULT_DNS_THREAD_POOL_SIZE);

    // validate data directory
    _dataDir = new File(dataDirectory);

    if (!_dataDir.exists()) {
      if (!_dataDir.mkdirs()) {
        System.out.println("Unable to create data directory: " + dataDirectory);
        return false;
      }
    } else if (!_dataDir.isDirectory()) {
      System.out.println("Invalid data directory:" + dataDirectory);
      return false;
    }

    // update properties ...
    LOG.info("Config says rpcInterface is:" + rpcInterface);
    conf.set("org.commoncrawl.rpcInterface", rpcInterface);

    LOG.info("Config says httpInterface is:" + webInterface);
    conf.set("org.commoncrawl.httpInterface", webInterface);

    LOG.info("Config says rpcPort is:" + rpcPort);
    conf.setInt("org.commoncrawl.rpcPort", rpcPort);

    LOG.info("Config says httpPort is:" + httpPort);
    conf.setInt("org.commoncrawl.httpPort", httpPort);

    LOG.info("Config says dataDir  is:" + dataDirectory);
    conf.set("org.commoncrawl.dataDir", dataDirectory);

    // extract ip address for rpc / web interfaces
    String rpcIPS[] = getIPs(rpcInterface);
    String webIPS[] = getIPs(webInterface);

    if (rpcIPS == null || rpcIPS.length == 0) {
      LOG
          .error("No Valid IP Addresses found for RPC Interface:"
              + rpcInterface);
      return false;
    }

    if (webIPS == null || webIPS.length == 0) {
      LOG
          .error("No Valid IP Addresses found for Web Interface:"
              + webInterface);
      return false;
    }

    String selectedRPCInterface = rpcIPS[0];
    if (rpcIPS.length > 1) {
      for (String rpcIPAddress : rpcIPS) {
        if (!rpcIPAddress.endsWith(".1")) {
          selectedRPCInterface = rpcIPAddress;
          break;
        }
      }
    }

    String selectedWebInterface = webIPS[0];
    if (webIPS.length > 1) {
      for (String webIPAddress : rpcIPS) {
        if (!webIPAddress.endsWith(".1")) {
          selectedWebInterface = webIPAddress;
          break;
        }
      }
    }

    LOG.info("RPC Interface translates to IP:" + selectedRPCInterface);
    LOG.info("Web Interface translates to IP:" + selectedWebInterface);

    // initialize the server address
    _serverAddress = new InetSocketAddress(selectedRPCInterface, rpcPort);

    // init the event loop
    _eventLoop = new EventLoop(registerThreadPool(DNS_POOL_NAME,
        _dnsThreadPoolSize));

    // and the Web Server
    _webServer = new WebServer(this, selectedWebInterface, httpPort, false,
        _useAsyncWebDispatch);
    _webServer.setAttribute("commoncrawl.server", this);

    // initialize the underlying server
    if (initServer()) {

      // log the startup time
      LOG.info(_commonConfig._className + " up at: " + _serverAddress);

      return true;
    }
    return false;
  }

  public void start() throws IOException {
    // add a shutdown hook ...
    Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {

      @Override
      public void run() {
        stop();
      }

    }));

    _eventLoop.start();

    if (startDaemons()) {
      // start RPC server
      super.start();
      // and finally start web server ...
      _webServer.start();
    }
  }

  private void join() throws InterruptedException {
    if (_eventLoop.getEventThread() != null) {
      _eventLoop.getEventThread().join();
    }
  }

  /** called to initiate an orderly shutdown of the service **/
  public void stop() {

    // stop RPC Server
    super.stop();
    /*
     * try { // and web server _webServer.stop(); } catch (InterruptedException
     * e) {
     *
     * }
     */
    // and finally stop daemons
    stopDaemons();

    _eventLoop.stop();
  }

  public static CommonConfig parseCommonConfig(String argv[]) {

    CommonConfig configOut = new CommonConfig();
    if (argv != null) {
      for (int i = 0; i < argv.length; ++i) {
        if (argv[i].equalsIgnoreCase("--server")) {
          if (i + 1 < argv.length) {
            configOut._className = argv[++i];
          }
        } else if (argv[i].equalsIgnoreCase("--hostname")) {
          if (i + 1 < argv.length) {
            configOut._hostName = argv[++i];
          }
        } else if (argv[i].equalsIgnoreCase("--rpcIntfc")) {
          if (i + 1 < argv.length) {
            configOut._rpcInterface = argv[++i];
          }
        } else if (argv[i].equalsIgnoreCase("--httpIntfc")) {
          if (i + 1 < argv.length) {
            configOut._webInterface = argv[++i];
          }
        } else if (argv[i].equalsIgnoreCase("--rpcPort")) {
          if (i + 1 < argv.length) {
            configOut._rpcPort = Integer.parseInt(argv[++i]);
          }
        } else if (argv[i].equalsIgnoreCase("--httpPort")) {
          if (i + 1 < argv.length) {
            configOut._webPort = Integer.parseInt(argv[++i]);
          }
        } else if (argv[i].equalsIgnoreCase("--conf")) {
          if (i + 1 < argv.length) {
            configOut._configName = argv[++i];
          }
        } else if (argv[i].equalsIgnoreCase("--dataDir")) {
          if (i + 1 < argv.length) {
            configOut._dataDir = argv[++i];
          }
        } else if (argv[i].equalsIgnoreCase("--dnsPoolSize")) {
          configOut._dnsThreadPoolSize = Integer.parseInt(argv[++i]);
        }

        else if (argv[i].equalsIgnoreCase("--detailLogging")) {
          boolean enabled = Boolean.parseBoolean(argv[++i]);
          _detailLoggingEnabled = enabled;
        }
        else if (argv[i].equalsIgnoreCase("--dumpAppName")) {
          configOut._dumpAppName = true;
        }
      }
    }
    return configOut;
  }

  private static void printCommonUsage() {
    System.err
        .println("Usage: java CommonCrawlServer --server [server type] --hostname [hostname] --rpcInterface [rpc server interface] --rpcPort [rpc server port] --httpInterface [web server interface] --httpPort [web server port] --dataDir [data directory] --conf [config file] [server args]");

  }

  public static void main(String argv[]) throws Exception {

    try {
      Configuration conf = new Configuration();

      conf.addResource("core-site.xml");
      conf.addResource("mapred-site.xml");
      conf.addResource("hdfs-site.xml");

      _commonConfig = parseCommonConfig(argv);

      if (_commonConfig._dumpAppName) {
        if (_commonConfig._className == null) {
          System.out.println("");
          return;
        }
        else  {
          try {
            // instantiate server ...
            Class theClass = conf.getClassByName(_commonConfig._className);
            Object serverInstance = theClass.newInstance();
            CommonCrawlServer server = CommonCrawlServer.class.cast(serverInstance);
            // write out preferred log file name
            System.out.println(server.getAppName());
          }
          catch (Exception e) {
            System.err.println("Failed to Instatiate Server with Error:" + CCStringUtils.stringifyException(e));
          }
          return;
        }
      }
      if (_commonConfig._className == null) {
        printCommonUsage();
        return;
      }
      if (_commonConfig._configName != null) {
        LOG.info("Processing Config File:" + _commonConfig._configName);
        conf.addResource(_commonConfig._configName);
      }

      if (_commonConfig._hostName != null) {
        conf.set("org.commoncrawl.hostname", _commonConfig._hostName);
      }

      if (_commonConfig._rpcInterface != null) {
        conf.set("org.commoncrawl.rpcInterface", _commonConfig._rpcInterface);
      }

      if (_commonConfig._webInterface != null) {
        conf.set("org.commoncrawl.httpInterface", _commonConfig._webInterface);
      }

      if (_commonConfig._rpcPort != -1) {
        conf.setInt("org.commoncrawl.rpcPort", _commonConfig._rpcPort);
      }
      if (_commonConfig._webPort != -1) {
        conf.setInt("org.commoncrawl.httpPort", _commonConfig._webPort);
      }
      if (_commonConfig._dataDir != null) {
        conf.set("org.commoncrawl.dataDir", _commonConfig._dataDir);
      }
      if (_commonConfig._dnsThreadPoolSize != -1) {
        conf.setInt("org.commoncrawl.dnsThreadPoolSize",
            _commonConfig._dnsThreadPoolSize);
      }
      LOG.info("Log File Is:" + System.getProperty("commoncrawl.log.file"));

      LOG.info("Instantiating Class:" + _commonConfig._className);
      Class theClass = conf.getClassByName(_commonConfig._className);

      Object serverInstance = theClass.newInstance();
      CommonCrawlServer server = CommonCrawlServer.class.cast(serverInstance);

      StringUtils.startupShutdownMessage(theClass, argv, LOG);

      if (server.init(argv, conf)) {
        try {

          server.start();
          server.join();

        } catch (IOException e) {
          LOG.error(CCStringUtils.stringifyException(e));
          throw e;
        } finally {
          server.stopDaemons();
          server.stop();
        }
      }

    } catch (Throwable e) {
      LOG.error(StringUtils.stringifyException(e));
      e.printStackTrace();
      System.exit(-1);
    }
  }

  public Configuration getConfig() {
    return _configuration;
  }

  /** return the ip address for a given network interface **/
  public static String[] getIPs(String strInterface)
      throws UnknownHostException {

    Pattern pattern = Pattern.compile("([0-9]+).([0-9]+).([0-9]+).([0-9]+)");

    Matcher m = pattern.matcher(strInterface);

    // if this is an ip address ...
    if (m.matches()) {
      LOG.info("getIPs detected ip address as interface name. returned: "
          + strInterface);
      return new String[] { strInterface };
    } else {
      if (strInterface.equalsIgnoreCase("localhost")
          || strInterface.equalsIgnoreCase("lo")) {
        LOG.info("getIPs for localhost name:" + strInterface
            + " returned: 127.0.0.1");
        return new String[] { "127.0.0.1" };
      }

      try {

        NetworkInterface netIF = NetworkInterface.getByName(strInterface);

        if (netIF == null) {
          return null;
        } else {
          Vector<String> ips = new Vector<String>();

          Enumeration<InetAddress> e = netIF.getInetAddresses();

          while (e.hasMoreElements()) {

            InetAddress address = e.nextElement();
            // only allow ipv4 addresses for now ...
            if (address.getAddress().length == 4) {
              LOG.info("getIPs for name:" + strInterface + " found:"
                  + address.getHostAddress());
              ips.add(address.getHostAddress());
            }
          }
          return ips.toArray(new String[] {});
        }
      } catch (SocketException e) {
        return null;
      }
    }
  }

  /**
   * dispatch an async web request on the server's main event loop thread
   *
   * @param request
   *          object
   * @throws IOException
   */
  public void dispatchAsyncWebRequest(final AsyncWebServerRequest request)
      throws IOException {
    request.dispatch(_eventLoop);
    if (request.getException() != null) {
      throw request.getException();
    }
  }
}
TOP

Related Classes of org.commoncrawl.server.CommonCrawlServer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.