Package com.subgraph.vega.impl.scanner.urls

Source Code of com.subgraph.vega.impl.scanner.urls.UriParser

/*******************************************************************************
* Copyright (c) 2011 Subgraph.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
*     Subgraph - initial API and implementation
******************************************************************************/
package com.subgraph.vega.impl.scanner.urls;

import java.util.List;

import org.apache.http.Consts;
import org.apache.http.HttpHost;
import org.apache.http.NameValuePair;
import org.apache.http.client.utils.URLEncodedUtils;

import com.subgraph.vega.api.analysis.IContentAnalyzer;
import com.subgraph.vega.api.crawler.ICrawlerResponseProcessor;
import com.subgraph.vega.api.crawler.IWebCrawler;
import com.subgraph.vega.api.model.IWorkspace;
import com.subgraph.vega.api.model.alerts.IScanInstance;
import com.subgraph.vega.api.model.web.IWebHost;
import com.subgraph.vega.api.model.web.IWebModel;
import com.subgraph.vega.api.model.web.IWebPath;
import com.subgraph.vega.api.model.web.IWebPath.PathType;
import com.subgraph.vega.api.scanner.IPathState;
import com.subgraph.vega.api.scanner.IScannerConfig;
import com.subgraph.vega.api.scanner.modules.IBasicModuleScript;
import com.subgraph.vega.api.util.VegaURI;
import com.subgraph.vega.impl.scanner.handlers.DirectoryProcessor;
import com.subgraph.vega.impl.scanner.handlers.FileProcessor;
import com.subgraph.vega.impl.scanner.handlers.UnknownProcessor;
import com.subgraph.vega.impl.scanner.state.PathState;
import com.subgraph.vega.impl.scanner.state.PathStateManager;

public class UriParser {
  private final IWorkspace workspace;
  private final ICrawlerResponseProcessor directoryProcessor;
  private final ICrawlerResponseProcessor fileProcessor;
  private final ICrawlerResponseProcessor unknownProcessor;
  private final PathStateManager pathStateManager;

  public UriParser(IScannerConfig config, List<IBasicModuleScript> injectionModules, IWorkspace workspace, IWebCrawler crawler, UriFilter filter, IContentAnalyzer contentAnalyzer, IScanInstance scanInstance, boolean isProxyScan) {
    this.workspace = workspace;
    this.directoryProcessor = new DirectoryProcessor();
    this.fileProcessor = new FileProcessor();
    this.unknownProcessor = new UnknownProcessor();
    this.pathStateManager = new PathStateManager(config, injectionModules, workspace, crawler, new ResponseAnalyzer(config, contentAnalyzer, this, filter, isProxyScan), scanInstance, isProxyScan);
  }

  public void updateInjectionModules(List<IBasicModuleScript> injectionModules) {
    pathStateManager.setInjectionModules(injectionModules);
  }

  public IPathState processUri(VegaURI uri) {
    final IWebHost webHost = getWebHost(uri.getTargetHost());
    final IWebPath rootPath = webHost.getRootPath();
    IWebPath path = rootPath;
    final boolean hasTrailingSlash = uri.getPath().endsWith("/");

    String[] parts = uri.getPath().split("/");
    IWebPath childPath = null;
    for(int i = 1; i < parts.length; i++) {
      synchronized(path) {
        childPath = path.getChildPath(parts[i]);
        if(childPath == null) {
          childPath = path.addChildPath(parts[i]);
        }
        processPath(childPath, uri, (i == (parts.length - 1)), hasTrailingSlash);
      }
      path = childPath;
    }
   
    if(path == rootPath && uri.getQuery() != null) {
      final PathState ps = pathStateManager.getStateForPath(rootPath);
      synchronized (pathStateManager) {
        ps.maybeAddParameters(URLEncodedUtils.parse(uri.getQuery(), Consts.UTF_8));
      }
    }
    return pathStateManager.getStateForPath(path);
  }

  private IWebHost getWebHost(HttpHost host) {
    IWebHost webHost = null;
    synchronized(workspace) {
      final IWebModel webModel = workspace.getWebModel();
      webHost = webModel.getWebHostByHttpHost(host);
      if(webHost == null)
        webHost = webModel.createWebHostFromHttpHost(host);
    }
    processWebHost(webHost);
    return webHost;
  }

  private void processWebHost(IWebHost webHost) {
    final IWebPath rootPath = webHost.getRootPath();
    synchronized(pathStateManager) {
      if(!pathStateManager.hasSeenPath(rootPath)) {
        pathStateManager.createStateForPath(rootPath, directoryProcessor);
      }
    }
  }

  private void processPath(IWebPath webPath, VegaURI uri, boolean isLast, boolean hasTrailingSlash) {
    if(!isLast || (isLast && hasTrailingSlash)) {
      processDirectory(webPath, uri);
    } else if(uri.getQuery() != null) {
      webPath.setPathType(PathType.PATH_FILE);
      processPathWithQuery(webPath, uri);
    } else {
      processUnknown(webPath);
    }
  }

  private void processDirectory(IWebPath webPath, VegaURI uri) {
    synchronized(pathStateManager) {
      if(!pathStateManager.hasSeenPath(webPath)) {
        webPath.setPathType(PathType.PATH_DIRECTORY);
        final PathState ps = pathStateManager.createStateForPath(webPath, directoryProcessor);
        if(uri.getQuery() != null) {
          ps.maybeAddParameters(URLEncodedUtils.parse(uri.getQuery(), Consts.UTF_8));
        }
        return;
      }
      if(webPath.getPathType() != PathType.PATH_DIRECTORY) {
        // XXX What to do in this case?  The PathState node may be executing PATH_UNKNOWN checks already
        System.out.println("Scan state node for path="+ webPath + " already exists but it's not a directory in UriParser.processDirectory()");
      }
    }
  }


  private void processPathWithQuery(IWebPath path, VegaURI uri) {
    path.setPathType(PathType.PATH_FILE);
    List<NameValuePair> plist = URLEncodedUtils.parse(uri.getQuery(), Consts.UTF_8);
    synchronized(pathStateManager) {
      final PathState ps = getPathStateForFile(path);
      if(ps != null) {
        ps.maybeAddParameters(plist);
      }
    }
  }

  private PathState getPathStateForFile(IWebPath filePath) {
    synchronized(pathStateManager) {
      if(pathStateManager.hasSeenPath(filePath))
        return pathStateManager.getStateForPath(filePath);
      else
        return pathStateManager.createStateForPath(filePath, fileProcessor);
    }
  }

  private void processUnknown(IWebPath path) {
    synchronized(pathStateManager) {
      if(pathStateManager.hasSeenPath(path))
        return;
      pathStateManager.createStateForPath(path, unknownProcessor);
    }
  }

}
TOP

Related Classes of com.subgraph.vega.impl.scanner.urls.UriParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.