/*
* This file is part of the Heritrix web crawler (crawler.archive.org).
*
* Licensed to the Internet Archive (IA) by one or more individual
* contributors.
*
* The IA licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.archive.modules.fetcher;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import java.util.logging.Logger;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import junit.framework.TestCase;
import org.apache.commons.httpclient.URIException;
import org.apache.commons.io.IOUtils;
import org.archive.crawler.prefetch.PreconditionEnforcer;
import org.archive.modules.CrawlMetadata;
import org.archive.modules.CrawlURI;
import org.archive.modules.CrawlURI.FetchType;
import org.archive.modules.credential.HtmlFormCredential;
import org.archive.net.UURI;
import org.archive.net.UURIFactory;
import org.archive.util.Recorder;
import org.archive.util.TmpDirTestCase;
import org.mortbay.jetty.NCSARequestLog;
import org.mortbay.jetty.Request;
import org.mortbay.jetty.Server;
import org.mortbay.jetty.bio.SocketConnector;
import org.mortbay.jetty.handler.HandlerCollection;
import org.mortbay.jetty.handler.RequestLogHandler;
import org.mortbay.jetty.security.Authenticator;
import org.mortbay.jetty.security.Constraint;
import org.mortbay.jetty.security.ConstraintMapping;
import org.mortbay.jetty.security.FormAuthenticator;
import org.mortbay.jetty.security.HashUserRealm;
import org.mortbay.jetty.security.SecurityHandler;
import org.mortbay.jetty.servlet.HashSessionManager;
import org.mortbay.jetty.servlet.SessionHandler;
/* Somewhat redundant to org.archive.crawler.selftest.FormAuthSelfTest, but
* the code is written, it's easier to run in eclipse, and no doubt tests
* somewhat different stuff. */
public class FormAuthTest extends TestCase {
private static Logger logger = Logger.getLogger(FormAuthTest.class.getName());
protected static final String DEFAULT_PAYLOAD_STRING = "abcdefghijklmnopqrstuvwxyz0123456789\n";
protected static final String FORM_AUTH_REALM = "form-auth-realm";
protected static final String FORM_AUTH_ROLE = "form-auth-role";
protected static final String FORM_AUTH_LOGIN = "form-auth-login";
protected static final String FORM_AUTH_PASSWORD = "form-auth-password";
protected FetchHTTP fetchHttp;
protected FetchHTTP getFetcher() throws IOException {
if (fetchHttp == null) {
fetchHttp = new FetchHTTP();
fetchHttp.setCookieStore(new SimpleCookieStore());
fetchHttp.setServerCache(new DefaultServerCache());
CrawlMetadata uap = new CrawlMetadata();
uap.setUserAgentTemplate(getClass().getName());
fetchHttp.setUserAgentProvider(uap);
fetchHttp.start();
}
return fetchHttp;
}
protected Recorder getRecorder() throws IOException {
if (Recorder.getHttpRecorder() == null) {
Recorder httpRecorder = new Recorder(TmpDirTestCase.tmpDir(),
getClass().getName(), 16 * 1024, 512 * 1024);
Recorder.setHttpRecorder(httpRecorder);
}
return Recorder.getHttpRecorder();
}
protected CrawlURI makeCrawlURI(String uri) throws URIException,
IOException {
UURI uuri = UURIFactory.getInstance(uri);
CrawlURI curi = new CrawlURI(uuri);
curi.setSeed(true);
curi.setRecorder(getRecorder());
return curi;
}
// convenience methods to get strings from raw recorded i/o
/**
* Raw response including headers.
*/
protected String rawResponseString(CrawlURI curi) throws IOException, UnsupportedEncodingException {
byte[] buf = IOUtils.toByteArray(curi.getRecorder().getReplayInputStream());
return new String(buf, "US-ASCII");
}
/**
* Raw message body, before any unchunking or content-decoding.
*/
protected String messageBodyString(CrawlURI curi) throws IOException, UnsupportedEncodingException {
byte[] buf = IOUtils.toByteArray(curi.getRecorder().getMessageBodyReplayInputStream());
return new String(buf, "US-ASCII");
}
/**
* Message body after unchunking but before content-decoding.
*/
protected String entityString(CrawlURI curi) throws IOException, UnsupportedEncodingException {
byte[] buf = IOUtils.toByteArray(curi.getRecorder().getEntityReplayInputStream());
return new String(buf, "US-ASCII");
}
/**
* Unchunked, content-decoded message body.
*/
protected String contentString(CrawlURI curi) throws IOException, UnsupportedEncodingException {
byte[] buf = IOUtils.toByteArray(curi.getRecorder().getContentReplayInputStream());
return new String(buf, "US-ASCII");
}
protected String httpRequestString(CrawlURI curi) throws IOException, UnsupportedEncodingException {
byte[] buf = IOUtils.toByteArray(curi.getRecorder().getRecordedOutput().getReplayInputStream());
return new String(buf, "US-ASCII");
}
public void testFormAuth() throws Exception {
startHttpServers();
HtmlFormCredential cred = new HtmlFormCredential();
cred.setDomain("localhost:7779");
cred.setLoginUri("/j_security_check");
HashMap<String, String> formItems = new HashMap<String,String>();
formItems.put("j_username", FORM_AUTH_LOGIN);
formItems.put("j_password", FORM_AUTH_PASSWORD);
cred.setFormItems(formItems);
getFetcher().getCredentialStore().getCredentials().put("form-auth-credential",
cred);
CrawlURI curi = makeCrawlURI("http://localhost:7779/");
getFetcher().process(curi);
logger.info('\n' + httpRequestString(curi) + contentString(curi));
runDefaultChecks(curi, "hostHeader");
// jetty needs us to hit a restricted url so it can redirect to the
// login page and remember where to redirect back to after successful
// login (if not we get a NPE within jetty)
curi = makeCrawlURI("http://localhost:7779/auth/1");
getFetcher().process(curi);
logger.info('\n' + httpRequestString(curi) + "\n\n" + rawResponseString(curi));
assertEquals(302, curi.getFetchStatus());
assertTrue(curi.getHttpResponseHeader("Location").startsWith("http://localhost:7779/login.html"));
PreconditionEnforcer preconditionEnforcer = new PreconditionEnforcer();
preconditionEnforcer.setServerCache(getFetcher().getServerCache());
preconditionEnforcer.setCredentialStore(getFetcher().getCredentialStore());
boolean result = preconditionEnforcer.credentialPrecondition(curi);
assertTrue(result);
CrawlURI loginUri = curi.getPrerequisiteUri();
assertEquals("http://localhost:7779/j_security_check", loginUri.toString());
// there's some special logic with side effects in here for the login uri itself
result = preconditionEnforcer.credentialPrecondition(loginUri);
assertFalse(result);
loginUri.setRecorder(getRecorder());
getFetcher().process(loginUri);
logger.info('\n' + httpRequestString(loginUri) + "\n\n" + rawResponseString(loginUri));
assertEquals(302, loginUri.getFetchStatus()); // 302 on successful login
assertEquals("http://localhost:7779/auth/1", loginUri.getHttpResponseHeader("location"));
curi = makeCrawlURI("http://localhost:7779/auth/1");
getFetcher().process(curi);
logger.info('\n' + httpRequestString(curi) + contentString(curi));
runDefaultChecks(curi, "hostHeader", "requestLine");
}
protected static final String LOGIN_HTML =
"<html>"
+ "<head><title>Log In</title></head>"
+ "<body>"
+ "<form action='/j_security_check' method='post'>"
+ "<div> username: <input name='j_username' type='text'/> </div>"
+ "<div> password: <input name='j_password' type='password'/> </div>"
+ "<div> <input type='submit' /> </div>" + "</form>" + "</body>"
+ "</html>";
protected static class FormAuthTestHandler extends SessionHandler {
public FormAuthTestHandler() {
super();
}
@Override
public void handle(String target, HttpServletRequest request,
HttpServletResponse response, int dispatch) throws IOException,
ServletException {
if (target.endsWith("/set-cookie")) {
response.addCookie(new javax.servlet.http.Cookie("test-cookie-name", "test-cookie-value"));
}
if (target.equals("/login.html")) {
response.setContentType("text/html;charset=US-ASCII");
response.setStatus(HttpServletResponse.SC_OK);
response.getOutputStream().write(LOGIN_HTML.getBytes("US-ASCII"));
((Request)request).setHandled(true);
} else {
response.setContentType("text/plain;charset=US-ASCII");
response.setDateHeader("Last-Modified", 0);
response.setStatus(HttpServletResponse.SC_OK);
response.getOutputStream().write(DEFAULT_PAYLOAD_STRING.getBytes("US-ASCII"));
((Request)request).setHandled(true);
}
}
}
protected static SecurityHandler makeAuthWrapper(Authenticator authenticator,
final String role, String realm, final String login,
final String password) {
Constraint constraint = new Constraint();
constraint.setRoles(new String[] { role });
constraint.setAuthenticate(true);
ConstraintMapping constraintMapping = new ConstraintMapping();
constraintMapping.setConstraint(constraint);
constraintMapping.setPathSpec("/auth/*");
SecurityHandler authWrapper = new SecurityHandler();
authWrapper.setAuthenticator(authenticator);
authWrapper.setConstraintMappings(new ConstraintMapping[] {constraintMapping});
authWrapper.setUserRealm(new HashUserRealm(realm) {
{
put(login, password);
addUserToRole(login, role);
}
});
return authWrapper;
}
protected void startHttpServers() throws Exception {
// server for form auth
Server server = new Server();
SocketConnector sc = new SocketConnector();
sc.setHost("127.0.0.1");
sc.setPort(7779);
server.addConnector(sc);
HandlerCollection handlers = new HandlerCollection();
handlers.addHandler(new FormAuthTestHandler());
RequestLogHandler requestLogHandler = new RequestLogHandler();
NCSARequestLog requestLog = new NCSARequestLog();
requestLogHandler.setRequestLog(requestLog);
handlers.addHandler(requestLogHandler);
FormAuthenticator formAuthenticatrix = new FormAuthenticator();
formAuthenticatrix.setLoginPage("/login.html");
SecurityHandler authWrapper = makeAuthWrapper(formAuthenticatrix,
FORM_AUTH_ROLE, FORM_AUTH_REALM, FORM_AUTH_LOGIN,
FORM_AUTH_PASSWORD);
authWrapper.setHandler(handlers);
SessionHandler sessionHandler = new SessionHandler();
sessionHandler.setSessionManager(new HashSessionManager());
sessionHandler.setHandler(authWrapper);
server.setHandler(sessionHandler);
server.start();
}
protected void runDefaultChecks(CrawlURI curi, String... exclusionsArray)
throws IOException, UnsupportedEncodingException {
Set<String> exclusions = new HashSet<String>(Arrays.asList(exclusionsArray));
String requestString = httpRequestString(curi);
if (!exclusions.contains("requestLine")) {
assertTrue(requestString.startsWith("GET / HTTP/1.0\r\n"));
}
assertTrue(requestString.contains("User-Agent: " + getClass().getName() + "\r\n"));
assertTrue(requestString.matches("(?s).*Connection: [Cc]lose\r\n.*"));
if (!exclusions.contains("acceptHeaders")) {
assertTrue(requestString.contains("Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n"));
}
if (!exclusions.contains("hostHeader")) {
assertTrue(requestString.contains("Host: localhost:7777\r\n"));
}
assertTrue(requestString.endsWith("\r\n\r\n"));
// check sizes
assertEquals(DEFAULT_PAYLOAD_STRING.length(), curi.getContentLength());
assertEquals(curi.getContentSize(), curi.getRecordedSize());
// check various
assertEquals("sha1:TQ5R6YVOZLTQENRIIENVGXHOPX3YCRNJ", curi.getContentDigestSchemeString());
assertEquals("text/plain;charset=US-ASCII", curi.getContentType());
assertEquals(Charset.forName("US-ASCII"), curi.getRecorder().getCharset());
assertTrue(curi.getCredentials().isEmpty());
assertTrue(curi.getFetchDuration() >= 0);
assertTrue(curi.getFetchStatus() == 200);
assertTrue(curi.getFetchType() == FetchType.HTTP_GET);
// check message body, i.e. "raw, possibly chunked-transfer-encoded message contents not including the leading headers"
assertEquals(DEFAULT_PAYLOAD_STRING, messageBodyString(curi));
// check entity, i.e. "message-body after any (usually-unnecessary) transfer-decoding but before any content-encoding (eg gzip) decoding"
assertEquals(DEFAULT_PAYLOAD_STRING, entityString(curi));
// check content, i.e. message-body after possibly tranfer-decoding and after content-encoding (eg gzip) decoding
assertEquals(DEFAULT_PAYLOAD_STRING, contentString(curi));
assertEquals(DEFAULT_PAYLOAD_STRING.substring(0, 10), curi.getRecorder().getContentReplayPrefixString(10));
assertEquals(DEFAULT_PAYLOAD_STRING, curi.getRecorder().getContentReplayCharSequence().toString());
assertTrue(curi.getNonFatalFailures().isEmpty());
}
}