Package org.archive.wayback.replay

Source Code of org.archive.wayback.replay.TextDocument$MMSToHTTPResultURIConverter

/*
*  This file is part of the Wayback archival access software
*   (http://archive-access.sourceforge.net/projects/wayback/).
*
*  Licensed to the Internet Archive (IA) by one or more individual
*  contributors.
*
*  The IA licenses this file to You under the Apache License, Version 2.0
*  (the "License"); you may not use this file except in compliance with
*  the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
*  Unless required by applicable law or agreed to in writing, software
*  distributed under the License is distributed on an "AS IS" BASIS,
*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*  See the License for the specific language governing permissions and
*  limitations under the License.
*/
package org.archive.wayback.replay;

import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.archive.wayback.ResultURIConverter;
import org.archive.wayback.core.Resource;
import org.archive.wayback.core.CaptureSearchResult;
import org.archive.wayback.core.CaptureSearchResults;
import org.archive.wayback.core.UIResults;
import org.archive.wayback.core.WaybackRequest;

/**
* Class which wraps functionality for converting a Resource(InputStream +
* HTTP headers) into a StringBuilder, performing several common URL
* resolution methods against that StringBuilder, inserting arbitrary Strings
* into the page, and then converting the page back to a byte array.
*
* @author brad
* @version $Date$, $Revision$
*/
public class TextDocument {
  // if documents are marked up before sending to clients, the data is
  // decoded into a String in chunks. This is how big a chunk to decode with.
  private final static int C_BUFFER_SIZE = 4096;

  private Resource resource = null;
  private CaptureSearchResult result = null;
  private ResultURIConverter uriConverter = null;
  /**
   * the internal StringBuilder
   */
  public StringBuilder sb = null;
  private String charSet = null;
  private byte[] resultBytes = null;

  /**
   * @param resource
   * @param result
   * @param uriConverter
   */
  public TextDocument(Resource resource, CaptureSearchResult result,
      ResultURIConverter uriConverter) {
    this.resource = resource;
    this.result = result;
    this.uriConverter = uriConverter;
  }

  public void addBase() {

    // TODO: get url from Resource instead of SearchResult?
    String pageUrl = result.getOriginalUrl();
    String captureDate = result.getCaptureTimestamp();

    String existingBaseHref = TagMagix.getBaseHref(sb);
    if (existingBaseHref == null) {
      insertAtStartOfHead("<base href=\"" + pageUrl + "\" />");
    } else {
      pageUrl = existingBaseHref;
    }
   }

  /**
   * Update URLs inside the page, so those URLs which must be correct at
   * page load time resolve correctly to absolute URLs.
   *
   * This means ensuring there is a BASE HREF tag, adding one if missing,
   * and then resolving:
   *     FRAME-SRC, META-URL, LINK-HREF, SCRIPT-SRC
   * tag-attribute pairs against either the existing BASE-HREF, or the
   * page's absolute URL if it was missing.
   */
  public void resolvePageUrls() {

    // TODO: get url from Resource instead of SearchResult?
    String pageUrl = result.getOriginalUrl();
    String captureDate = result.getCaptureTimestamp();

    String existingBaseHref = TagMagix.getBaseHref(sb);
    if (existingBaseHref == null) {
      insertAtStartOfHead("<base href=\"" + pageUrl + "\" />");
    } else {
      pageUrl = existingBaseHref;
    }

    String markups[][] = {
        {"FRAME","SRC"},
        {"META","URL"},
        {"LINK","HREF"},
        {"SCRIPT","SRC"},
        {TagMagix.ANY_TAGNAME,"background"}
    };
    // TODO: The classic WM added a js_ to the datespec, so NotInArchives
    // can return an valid javascript doc, and not cause Javascript errors.
    for(String tagAttr[] : markups) {
      TagMagix.markupTagREURIC(sb, uriConverter, captureDate, pageUrl,
          tagAttr[0], tagAttr[1]);
    }
    TagMagix.markupCSSImports(sb,uriConverter, captureDate, pageUrl);
    TagMagix.markupStyleUrls(sb,uriConverter,captureDate,pageUrl);
  }
 
  /**
   * Update all URLs inside the page, so they resolve correctly to absolute
   * URLs within the Wayback service.
   */
  public void resolveAllPageUrls() {

    // TODO: get url from Resource instead of SearchResult?
    String pageUrl = result.getOriginalUrl();
    String captureDate = result.getCaptureTimestamp();

    String existingBaseHref = TagMagix.getBaseHref(sb);
    if (existingBaseHref != null) {
      pageUrl = existingBaseHref;
    }
    ResultURIConverter ruc = new SpecialResultURIConverter(uriConverter);
   
    // TODO: forms...?
    String markups[][] = {
        {"FRAME","SRC"},
        {"META","URL"},
        {"LINK","HREF"},
        {"SCRIPT","SRC"},
        {"IMG","SRC"},
        {"A","HREF"},
        {"AREA","HREF"},
        {"OBJECT","CODEBASE"},
        {"OBJECT","CDATA"},
        {"APPLET","CODEBASE"},
        {"APPLET","ARCHIVE"},
        {"EMBED","SRC"},
        {"IFRAME","SRC"},
        {TagMagix.ANY_TAGNAME,"background"}
    };
    for(String tagAttr[] : markups) {
      TagMagix.markupTagREURIC(sb, ruc, captureDate, pageUrl,
          tagAttr[0], tagAttr[1]);
    }
    TagMagix.markupCSSImports(sb,uriConverter, captureDate, pageUrl);
    TagMagix.markupStyleUrls(sb,uriConverter,captureDate,pageUrl);
  }
 
  public void resolveCSSUrls() {
    // TODO: get url from Resource instead of SearchResult?
    String pageUrl = result.getOriginalUrl();
    String captureDate = result.getCaptureTimestamp();
    TagMagix.markupCSSImports(sb,uriConverter, captureDate, pageUrl);
  }

  public void resolveASXRefUrls() {

    // TODO: get url from Resource instead of SearchResult?
    String pageUrl = result.getOriginalUrl();
    String captureDate = result.getCaptureTimestamp();
    ResultURIConverter ruc = new MMSToHTTPResultURIConverter(uriConverter);
   
    TagMagix.markupTagREURIC(sb, ruc, captureDate, pageUrl,
        "REF", "HREF");
  }
 
  public void stripHTML() {
    String stripped = sb.toString().replaceAll("\\<.*?>","");
    sb.setLength(0);
    sb.append(stripped);
  }
  /**
   * @param charSet
   * @throws IOException
   */
  public void readFully(String charSet) throws IOException {
    this.charSet = charSet;
    int recordLength = (int) resource.getRecordLength();

    // convert bytes to characters for charset:
    InputStreamReader isr = new InputStreamReader(resource, charSet);

    char[] cbuffer = new char[C_BUFFER_SIZE];

    // slurp the whole thing into RAM:
    sb = new StringBuilder(recordLength);
   
    //Skip the UTF-8 BOM 0xFEFF
    int firstChar = isr.read();
    if ((firstChar != '\uFEFF') && (firstChar != -1)) {
      sb.append((char)firstChar);
    }
   
    for (int r = -1; (r = isr.read(cbuffer, 0, C_BUFFER_SIZE)) != -1;) {
      sb.append(cbuffer, 0, r);
    }
  }
   
  /**
   * Read bytes from input stream, using best-guess for character encoding
   * @throws IOException
   */
  public void readFully() throws IOException {
    readFully(null);
  }
 
  /**
   * @return raw bytes contained in internal StringBuilder
   * @throws UnsupportedEncodingException
   */
  public byte[] getBytes() throws UnsupportedEncodingException {
    if(resultBytes != null) {
      return resultBytes;
    }
    if(sb == null) {
      throw new IllegalStateException("No interal StringBuffer");
    }
    if(resultBytes == null) {
      resultBytes = sb.toString().getBytes(charSet);
    }
    return resultBytes;
  }
 
  public void setResultBytes(byte[] resultBytes) {
    this.resultBytes = resultBytes;
  }
 
  /**
   * Write the contents of the page to the client.
   *
   * @param os
   * @throws IOException
   */
  public void writeToOutputStream(OutputStream os) throws IOException {
    if(sb == null) {
      throw new IllegalStateException("No interal StringBuffer");
    }
    byte[] b;
    try {
      b = getBytes();
    } catch (UnsupportedEncodingException e) {
      throw new RuntimeException(e);
    }
    os.write(b);
  }

  /**
   * @param toInsert
   */ 
  public void insertAtStartOfDocument(String toInsert) {
    sb.insert(0,toInsert);
  }

  /**
   * @param toInsert
   */ 
  public void insertAtEndOfDocument( String toInsert ) {
    sb.append( "\n" + toInsert );
  }

  /**
   * @param toInsert
   */ 
  public void insertAtStartOfHead(String toInsert) {
    int insertPoint = TagMagix.getEndOfFirstTag(sb,"head");
    if (-1 == insertPoint) {
      insertPoint = 0;
    }
    sb.insert(insertPoint,toInsert);
  }

  /**
   * @param toInsert
   */
  public void insertAtEndOfBody(String toInsert) {
    int insertPoint = sb.lastIndexOf("</body>");
    if (-1 == insertPoint) {
      insertPoint = sb.lastIndexOf("</BODY>");
    }
    if (-1 == insertPoint) {
      insertPoint = sb.length();
    }
    sb.insert(insertPoint,toInsert);
  }
  /**
   * @param toInsert
   */
  public void insertAtStartOfBody(String toInsert) {
    int insertPoint = TagMagix.getEndOfFirstTag(sb,"body");
    if (-1 == insertPoint) {
      insertPoint = 0;
    }
    sb.insert(insertPoint,toInsert);
  }
       
  /**
   * @param jspPath
   * @param httpRequest
   * @param httpResponse
   * @param wbRequest
   * @param results
         * @param result
         * @param resource
   * @return
         *
   * @throws IOException
   * @throws ServletException
   */
  public String includeJspString(String jspPath,
      HttpServletRequest httpRequest, HttpServletResponse httpResponse,
      WaybackRequest wbRequest, CaptureSearchResults results,
      CaptureSearchResult result, Resource resource)
  throws ServletException, IOException {
   
    if (wbRequest.isAjaxRequest()) {
      return "";
    }
   
    UIResults uiResults = new UIResults(wbRequest,uriConverter,results,
        result,resource);

    StringHttpServletResponseWrapper wrappedResponse =
      new StringHttpServletResponseWrapper(httpResponse);
    uiResults.forward(httpRequest, wrappedResponse, jspPath);
    return wrappedResponse.getStringResponse();
  }
 
  /**
   * @param jsUrl The javascript URL to be wrapped
   * @return A <code>&ltscript&gt</code> tag containing the provided javascript URL.
   */
  public String getJSIncludeString(final String jsUrl) {
    return "<script type=\"text/javascript\" src=\""
      + jsUrl + "\" ></script>\n";
  }

  /**
   * @return the charSet
   */
  public String getCharSet() {
    return charSet;
  }

  /**
   * @param charSet the charSet to set
   */
  public void setCharSet(String charSet) {
    this.charSet = charSet;
  }

  private class SpecialResultURIConverter implements ResultURIConverter {
    private static final String EMAIL_PROTOCOL_PREFIX = "mailto:";
    private static final String JAVASCRIPT_PROTOCOL_PREFIX = "javascript:";
    private ResultURIConverter base = null;
               
    public SpecialResultURIConverter(ResultURIConverter base) {
      this.base = base;
    }
               
                @Override
    public String makeReplayURI(String datespec, String url) {
      if(url.startsWith(EMAIL_PROTOCOL_PREFIX)) {
        return url;
      }
      if(url.startsWith(JAVASCRIPT_PROTOCOL_PREFIX)) {
        return url;
      }
      return base.makeReplayURI(datespec, url);
    }
  }

  private class MMSToHTTPResultURIConverter implements ResultURIConverter {
    private static final String MMS_PROTOCOL_PREFIX = "mms://";
    private static final String HTTP_PROTOCOL_PREFIX = "http://";
    private ResultURIConverter base = null;

                public MMSToHTTPResultURIConverter(ResultURIConverter base) {
      this.base = base;
    }
               
                @Override
    public String makeReplayURI(String datespec, String url) {
      if(url.startsWith(MMS_PROTOCOL_PREFIX)) {
        url = HTTP_PROTOCOL_PREFIX +
          url.substring(MMS_PROTOCOL_PREFIX.length());
      }
      return base.makeReplayURI(datespec, url);
    }
  } 
}
TOP

Related Classes of org.archive.wayback.replay.TextDocument$MMSToHTTPResultURIConverter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.