Package com.subgraph.vega.internal.analysis

Source Code of com.subgraph.vega.internal.analysis.ContentAnalyzer

/*******************************************************************************
* Copyright (c) 2011 Subgraph.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
*     Subgraph - initial API and implementation
******************************************************************************/
package com.subgraph.vega.internal.analysis;

import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;

import org.apache.http.HttpRequest;

import com.subgraph.vega.api.analysis.IContentAnalyzer;
import com.subgraph.vega.api.analysis.IContentAnalyzerResult;
import com.subgraph.vega.api.analysis.MimeType;
import com.subgraph.vega.api.http.requests.IHttpResponse;
import com.subgraph.vega.api.model.IWorkspace;
import com.subgraph.vega.api.model.alerts.IScanInstance;
import com.subgraph.vega.api.model.web.IWebModel;
import com.subgraph.vega.api.model.web.IWebPath;
import com.subgraph.vega.api.scanner.modules.IResponseProcessingModule;
import com.subgraph.vega.api.util.VegaURI;
import com.subgraph.vega.internal.analysis.urls.UrlExtractor;

public class ContentAnalyzer implements IContentAnalyzer {
 
  private final Logger logger = Logger.getLogger("analysis");
 
  private final IScanInstance scanInstance;
  private final ContentAnalyzerFactory factory;
  private final UrlExtractor urlExtractor = new UrlExtractor();
  private final MimeDetector mimeDetector = new MimeDetector();

  private final Object responseProcessingLock = new Object();

  private List<IResponseProcessingModule> responseProcessingModules;
  private boolean addLinksToModel;
  private boolean defaultAddToRequestLog;
   
  ContentAnalyzer(ContentAnalyzerFactory factory, IScanInstance scanInstance) {
    this.factory = factory;
    this.scanInstance = scanInstance;
    this.addLinksToModel = true;
    this.defaultAddToRequestLog = true;
  }

  @Override
  public IContentAnalyzerResult processResponse(IHttpResponse response) {
    return processResponse(response, defaultAddToRequestLog, true);
  }

  @Override
  public void setDefaultAddToRequestLog(boolean flag) {
    defaultAddToRequestLog = flag;   
  }

  @Override
  public IContentAnalyzerResult processResponse(IHttpResponse response, boolean addToRequestLog, boolean scrapePage) {
    final ContentAnalyzerResult result = new ContentAnalyzerResult();
    if(response == null) {
      logger.warning("ContentAnalyzer.processResponse() called with null response");
      return result;
    }

    final IWorkspace workspace = factory.getCurrentWorkspace();
    if(workspace == null) {
      logger.warning("ContentAnalyzer.processResponse() called while no workspace is active");
      return result;
    }
   
    if(addToRequestLog) {
      workspace.getRequestLog().addRequestResponse(response);
    }

    final VegaURI uri = VegaURI.fromHostAndRequest(response.getHost(), response.getOriginalRequest());
    final IWebPath path = workspace.getWebModel().getWebPathByUri(uri);
    path.setVisited(true);
   
    result.setDeclaredMimeType(mimeDetector.getDeclaredMimeType(response));
    result.setSniffedMimeType(mimeDetector.getSniffedMimeType(response));
   
    final String mimeType = getBestMimeType(result);
    if(mimeType != null && path.getMimeType() == null) {
      path.setMimeType(mimeType);
    }
   
    if(scrapePage)
      runExtractUrls(result, response, workspace.getWebModel());
    runResponseProcessingModules(response.getOriginalRequest(), response, result.getDeclaredMimeType(), result.getSniffedMimeType(), workspace);
    return result;
  }
 
  private String getBestMimeType(IContentAnalyzerResult result) {
    if(result.getSniffedMimeType() != MimeType.MIME_NONE) {
      return result.getSniffedMimeType().getCanonicalName();
    } else if(result.getDeclaredMimeType() != MimeType.MIME_NONE) {
      return result.getDeclaredMimeType().getCanonicalName();
    } else {
      return null;
    }
  }

  @Override
  public void setResponseProcessingModules(List<IResponseProcessingModule> modules) {
    responseProcessingModules = new ArrayList<IResponseProcessingModule>(modules);
  }
 
  private void runExtractUrls(ContentAnalyzerResult result, IHttpResponse response, IWebModel webModel) {
    if(response.isMostlyAscii()) {
      for(VegaURI u : urlExtractor.findUrls(response)) {
        if(addLinksToModel && (schemeEquals(u, "http") || schemeEquals(u, "https")))
          webModel.getWebPathByUri(u);
        result.addUri(u);
      }
    }
  }
 
  private boolean schemeEquals(VegaURI uri, String scheme) {
    final String s = uri.getTargetHost().getSchemeName();
    return s.equalsIgnoreCase(scheme);
  }
 
  private void runResponseProcessingModules(HttpRequest request, IHttpResponse response, MimeType declaredMime, MimeType sniffedMime, IWorkspace workspace) {
    if(responseProcessingModules == null || !response.isMostlyAscii()) {
      return;
    }

    if(!(isDefaultResponseProcessingMimetype(declaredMime) || isDefaultResponseProcessingMimetype(sniffedMime))) {
      return;
    }
   
    synchronized (responseProcessingLock) {
      for(IResponseProcessingModule m: responseProcessingModules) {
        if(m.isEnabled()) {
          m.processResponse(scanInstance, request, response, workspace);
        }
      }
    }
  }

  private boolean isDefaultResponseProcessingMimetype(MimeType mime) {
    final String name = mime.getCanonicalName();
    return (name.contains("text") || name.contains("javascript") || name.contains("xml"));
  }

  @Override
  public void setAddLinksToModel(boolean flag) {
    addLinksToModel = flag;
  }
}
TOP

Related Classes of com.subgraph.vega.internal.analysis.ContentAnalyzer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.