Package org.archive.modules.fetcher

Source Code of org.archive.modules.fetcher.CookieFetchHTTPIntegrationTest$CookieFetchHTTPIntegrationTests

/*
*  This file is part of the Heritrix web crawler (crawler.archive.org).
*
*  Licensed to the Internet Archive (IA) by one or more individual
*  contributors.
*
*  The IA licenses this file to You under the Apache License, Version 2.0
*  (the "License"); you may not use this file except in compliance with
*  the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
*  Unless required by applicable law or agreed to in writing, software
*  distributed under the License is distributed on an "AS IS" BASIS,
*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*  See the License for the specific language governing permissions and
*  limitations under the License.
*/
package org.archive.modules.fetcher;

import java.io.File;
import java.io.IOException;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.Set;

import javax.servlet.ServletException;
import javax.servlet.http.Cookie;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import junit.extensions.TestSetup;
import junit.framework.Test;
import junit.framework.TestSuite;

import org.apache.commons.collections.Closure;
import org.apache.commons.httpclient.URIException;
import org.apache.commons.io.FileUtils;
import org.archive.bdb.BdbModule;
import org.archive.modules.CrawlMetadata;
import org.archive.modules.CrawlURI;
import org.archive.modules.ProcessorTestBase;
import org.archive.modules.net.CrawlHost;
import org.archive.modules.net.CrawlServer;
import org.archive.modules.net.ServerCache;
import org.archive.spring.ConfigFile;
import org.archive.spring.ConfigPath;
import org.archive.util.TmpDirTestCase;
import org.mortbay.jetty.Request;
import org.mortbay.jetty.Server;
import org.mortbay.jetty.bio.SocketConnector;
import org.mortbay.jetty.security.SslSocketConnector;
import org.mortbay.jetty.servlet.SessionHandler;
import org.mortbay.log.Log;

import sun.security.tools.KeyTool;

import com.google.common.io.Files;

@SuppressWarnings("restriction")
public class CookieFetchHTTPIntegrationTest extends ProcessorTestBase {

    protected static class TestHandler extends SessionHandler {
        public TestHandler() {
            super();
        }

        @Override
        public void handle(String target, HttpServletRequest request,
                HttpServletResponse response, int dispatch) throws IOException,
                ServletException {
            if (request.getParameter("name") != null) {
                Cookie cookie = new javax.servlet.http.Cookie(request.getParameter("name"),
                        request.getParameter("value"));
                if (request.getParameter("domain") != null) {
                    cookie.setDomain(request.getParameter("domain"));
                }
                if (request.getParameter("path") != null) {
                    cookie.setDomain(request.getParameter("path"));
                }
                if (request.getParameter("maxAge") != null) {
                    cookie.setMaxAge(Integer.valueOf(request.getParameter("maxAge")));
                }
                if (request.getParameter("secure") != null) {
                    cookie.setSecure(request.getParameter("secure").equals(1));
                }
                if (request.getParameter("comment") != null) {
                    cookie.setComment(request.getParameter("comment"));
                }
                if (request.getParameter("version") != null) {
                    cookie.setVersion(Integer.valueOf(request.getParameter("version")));
                }
                response.addCookie(cookie);
            }

            response.setContentType("text/plain;charset=US-ASCII");
            response.setStatus(200);

            if (request.getCookies() != null) {
                response.getOutputStream().println(request.getCookies().length + " cookies received");
                for (int i = 0; i < request.getCookies().length; i++) {
                    response.getOutputStream().println(i + ". " + request.getCookies()[0]);
                }
            } else {
                response.getOutputStream().println("0 cookies received");
            }

            ((Request)request).setHandled(true);
        }
    }

    public static Server startHttpServer() throws Exception {
        Log.getLogger(Server.class.getCanonicalName()).setDebugEnabled(true);

        Server server = new Server();

        server.setHandler(new TestHandler());

        SocketConnector sc = new SocketConnector();
        sc.setHost("127.0.0.1");
        sc.setPort(7777);

        File keystoreFile = new File(TmpDirTestCase.tmpDir(), "keystore");
        if (keystoreFile.exists()) {
            keystoreFile.delete();
        }
        final String KEYSTORE_PASSWORD = "keystore-password";
        KeyTool.main(new String[] {
                "-keystore", keystoreFile.getPath(),
                "-storepass", KEYSTORE_PASSWORD,
                "-keypass", KEYSTORE_PASSWORD,
                "-alias", "jetty",
                "-genkey",
                "-keyalg", "RSA",
                "-dname", "CN=127.0.0.1",
                "-validity","3650"}); // 10 yr validity

        SslSocketConnector ssc = new SslSocketConnector();
        ssc.setHost("127.0.0.1");
        ssc.setPort(7443);
        ssc.setKeyPassword(KEYSTORE_PASSWORD);
        ssc.setKeystore(keystoreFile.getPath());

        server.addConnector(sc);
        server.addConnector(ssc);
        server.start();

        return server;
    }

    public static Test suite() {
        return new TestSetup(new TestSuite(CookieFetchHTTPIntegrationTests.class)) {
            private Server server;
            @Override
            protected void setUp() throws Exception {
                super.setUp();
                server = startHttpServer();
            }
            @Override
            protected void tearDown() throws Exception {
                super.tearDown();
                server.stop();
                server.destroy();
            }
        };
    }

    public static class AlwaysLocalhostServerCache extends ServerCache {
        protected static final InetAddress LOCALHOST;
        static {
            try {
                LOCALHOST = InetAddress.getByName("127.0.0.1");
            } catch (UnknownHostException e) {
                throw new RuntimeException(e);
            }
        }

        @Override
        public CrawlHost getHostFor(String host) {
            CrawlHost h = new CrawlHost(host);
            h.setIP(LOCALHOST, -1);
            return h;
        }
        @Override
        public CrawlServer getServerFor(String serverKey) {
            CrawlServer s = new CrawlServer(serverKey);
            return s;
        }
        @Override public void forAllHostsDo(Closure action) { throw new RuntimeException("not implemented"); }
        @Override public Set<String> hostKeys() { throw new RuntimeException("not implemented"); }
    }

    public static class CookieFetchHTTPIntegrationTests extends ProcessorTestBase {

        @Override
        protected Class<?> getModuleClass() {
            return BdbCookieStore.class;
        }

        protected FetchHTTP fetcher;
        protected BdbModule bdb;
        protected BdbCookieStore bdbCookieStore;

        protected File tmpdir = Files.createTempDir();

        protected BdbModule bdb() throws IOException {
            if (bdb == null) {
                ConfigPath basePath = new ConfigPath("testBase",
                        tmpdir.getAbsolutePath());
                ConfigPath bdbDir = new ConfigPath("bdb", "bdb");
                bdbDir.setBase(basePath);
                FileUtils.deleteDirectory(bdbDir.getFile());

                bdb = new BdbModule();
                bdb.setDir(bdbDir);
                bdb.start();
            }
            return bdb;
        }

        protected AbstractCookieStore bdbCookieStore() throws IOException {
            if (bdbCookieStore == null) {
                bdbCookieStore = new BdbCookieStore();
                ConfigPath basePath = new ConfigPath("testBase",
                        tmpdir.getAbsolutePath());
                ConfigFile cookiesSaveFile = new ConfigFile("cookiesSaveFile", "cookies.txt");
                cookiesSaveFile.setBase(basePath);
                bdbCookieStore.setCookiesSaveFile(cookiesSaveFile);
                bdbCookieStore.setBdbModule(bdb());
                bdbCookieStore.start();
            }
            return bdbCookieStore;
        }

        protected SimpleCookieStore simpleCookieStore;
        protected SimpleCookieStore simpleCookieStore() {
            if (simpleCookieStore == null) {
                simpleCookieStore = new SimpleCookieStore();
                simpleCookieStore.start();
            }
            return simpleCookieStore;
        }

        protected FetchHTTP fetcher() {
            if (fetcher == null) {
                fetcher = new FetchHTTP();
                // f.setCookieStore(cookieStore);
                fetcher.setServerCache(new AlwaysLocalhostServerCache());
                CrawlMetadata uap = new CrawlMetadata();
                uap.setUserAgentTemplate(getClass().getName());
                fetcher.setUserAgentProvider(uap);
                fetcher.start();
            }
            return fetcher;
        }

        public void testNoCookie() throws IOException, InterruptedException {
            testNoCookie(simpleCookieStore());
            testNoCookie(bdbCookieStore());
        }

        protected void testNoCookie(AbstractCookieStore cookieStore) throws URIException, IOException, InterruptedException {
            cookieStore.clear();
            fetcher().setCookieStore(cookieStore);

            CrawlURI curi = makeCrawlURI("http://example.com:7777/");
            fetcher().process(curi);
            assertFalse(FetchHTTPTests.httpRequestString(curi).toLowerCase().contains("cookie:"));
            assertFalse(FetchHTTPTests.rawResponseString(curi).toLowerCase().contains("set-cookie:"));

            // check second fetch has no cookies
            curi = makeCrawlURI("http://example.com:7777/");
            fetcher().process(curi);
            assertFalse(FetchHTTPTests.httpRequestString(curi).toLowerCase().contains("cookie:"));
            assertFalse(FetchHTTPTests.rawResponseString(curi).toLowerCase().contains("set-cookie:"));

            assertEquals(0, cookieStore.getCookies().size());
        }

        // name value domain path maxAge secure comment version

        protected void testBasics(AbstractCookieStore cookieStore) throws URIException, IOException, InterruptedException {
            cookieStore.clear();
            fetcher().setCookieStore(cookieStore);

            CrawlURI curi = makeCrawlURI("http://example.com:7777/?name=foo&value=bar");
            fetcher().process(curi);
            assertFalse(FetchHTTPTests.httpRequestString(curi).toLowerCase().contains("cookie:"));
            assertTrue(FetchHTTPTests.rawResponseString(curi).contains("Set-Cookie: foo=bar\r\n"));

            // check second fetch has expected cookie
            curi = makeCrawlURI("http://example.com:7777/");
            fetcher().process(curi);
            assertTrue(FetchHTTPTests.httpRequestString(curi).contains("Cookie: foo=bar\r\n"));
            assertFalse(FetchHTTPTests.rawResponseString(curi).toLowerCase().contains("set-cookie:"));

            assertEquals(1, cookieStore.getCookies().size());
        }

        public void testBasics() throws URIException, IOException, InterruptedException {
            testBasics(simpleCookieStore());
            testBasics(bdbCookieStore());
        }

        protected void testImplicitDomain(AbstractCookieStore cookieStore) throws URIException, IOException, InterruptedException {
            cookieStore.clear();
            fetcher().setCookieStore(cookieStore);

            CrawlURI curi = makeCrawlURI("http://example.com:7777/?name=foo&value=bar");
            fetcher().process(curi);
            assertFalse(FetchHTTPTests.httpRequestString(curi).toLowerCase().contains("cookie:"));
            assertTrue(FetchHTTPTests.rawResponseString(curi).contains("Set-Cookie: foo=bar\r\n"));

            // check second fetch has expected cookie
            curi = makeCrawlURI("http://example.com:7777/");
            fetcher().process(curi);
            assertTrue(FetchHTTPTests.httpRequestString(curi).contains("Cookie: foo=bar\r\n"));
            assertFalse(FetchHTTPTests.rawResponseString(curi).toLowerCase().contains("set-cookie:"));

            // check fetch to different domain has no cookie
            curi = makeCrawlURI("http://example.ORG:7777/");
            fetcher().process(curi);
            assertFalse(FetchHTTPTests.httpRequestString(curi).toLowerCase().contains("cookie:"));
            assertFalse(FetchHTTPTests.rawResponseString(curi).toLowerCase().contains("set-cookie:"));

            /*
             * XXX I think browsers differ on this behavior. This is what
             * org.apache.http.impl.cookie.BrowserCompatSpec does.
             */
            curi = makeCrawlURI("http://SUBDOMAIN.example.com:7777/");
            fetcher().process(curi);
            assertTrue(FetchHTTPTests.httpRequestString(curi).contains("Cookie: foo=bar\r\n"));
            assertFalse(FetchHTTPTests.rawResponseString(curi).toLowerCase().contains("set-cookie:"));

            assertEquals(1, cookieStore.getCookies().size());
        }

        public void testImplicitDomain() throws URIException, IOException, InterruptedException {
            testImplicitDomain(simpleCookieStore());
            testImplicitDomain(bdbCookieStore());
        }

        protected void testExplicitDomain(AbstractCookieStore cookieStore) throws URIException, IOException, InterruptedException {
            cookieStore.clear();
            fetcher().setCookieStore(cookieStore);

            CrawlURI curi = makeCrawlURI("http://example.com:7777/?name=foo&value=bar&domain=example.com");
            fetcher().process(curi);
            assertFalse(FetchHTTPTests.httpRequestString(curi).toLowerCase().contains("cookie:"));
            assertTrue(FetchHTTPTests.rawResponseString(curi).contains("Set-Cookie: foo=bar;Domain=example.com\r\n"));

            // check second fetch has expected cookie
            curi = makeCrawlURI("http://example.com:7777/");
            fetcher().process(curi);
            assertTrue(FetchHTTPTests.httpRequestString(curi).contains("Cookie: foo=bar\r\n"));
            assertFalse(FetchHTTPTests.rawResponseString(curi).toLowerCase().contains("set-cookie:"));

            // check fetch to different domain has no cookie
            curi = makeCrawlURI("http://example.ORG:7777/");
            fetcher().process(curi);
            assertFalse(FetchHTTPTests.httpRequestString(curi).toLowerCase().contains("cookie:"));
            assertFalse(FetchHTTPTests.rawResponseString(curi).toLowerCase().contains("set-cookie:"));

            curi = makeCrawlURI("http://SUBDOMAIN.example.com:7777/");
            fetcher().process(curi);
            assertTrue(FetchHTTPTests.httpRequestString(curi).contains("Cookie: foo=bar\r\n"));
            assertFalse(FetchHTTPTests.rawResponseString(curi).toLowerCase().contains("set-cookie:"));

            assertEquals(1, cookieStore.getCookies().size());
        }

        public void testExplicitDomain() throws URIException, IOException, InterruptedException {
            testExplicitDomain(simpleCookieStore());
            testExplicitDomain(bdbCookieStore());
        }

        protected void testExplicitDomainWithLeadingDot(AbstractCookieStore cookieStore) throws URIException, IOException, InterruptedException {
            cookieStore.clear();
            fetcher().setCookieStore(cookieStore);

            CrawlURI curi = makeCrawlURI("http://example.com:7777/?name=foo&value=bar&domain=.example.com");
            fetcher().process(curi);
            assertFalse(FetchHTTPTests.httpRequestString(curi).toLowerCase().contains("cookie:"));
            assertTrue(FetchHTTPTests.rawResponseString(curi).contains("Set-Cookie: foo=bar;Domain=.example.com\r\n"));

            // check second fetch has expected cookie
            curi = makeCrawlURI("http://example.com:7777/");
            fetcher().process(curi);
            assertTrue(FetchHTTPTests.httpRequestString(curi).contains("Cookie: foo=bar\r\n"));
            assertFalse(FetchHTTPTests.rawResponseString(curi).toLowerCase().contains("set-cookie:"));

            // check fetch to different domain has no cookie
            curi = makeCrawlURI("http://example.ORG:7777/");
            fetcher().process(curi);
            assertFalse(FetchHTTPTests.httpRequestString(curi).toLowerCase().contains("cookie:"));
            assertFalse(FetchHTTPTests.rawResponseString(curi).toLowerCase().contains("set-cookie:"));

            curi = makeCrawlURI("http://SUBDOMAIN.example.com:7777/");
            fetcher().process(curi);
            assertTrue(FetchHTTPTests.httpRequestString(curi).contains("Cookie: foo=bar\r\n"));
            assertFalse(FetchHTTPTests.rawResponseString(curi).toLowerCase().contains("set-cookie:"));

            assertEquals(1, cookieStore.getCookies().size());
        }

        public void testExplicitDomainWithLeadingDot() throws URIException, IOException, InterruptedException {
            testExplicitDomainWithLeadingDot(simpleCookieStore());
            testExplicitDomainWithLeadingDot(bdbCookieStore());
        }

        protected void testRejectDomain(AbstractCookieStore cookieStore) throws URIException, IOException, InterruptedException {
            cookieStore.clear();
            fetcher().setCookieStore(cookieStore);

            CrawlURI curi = makeCrawlURI("http://example.com:7777/?name=foo&value=bar&domain=somethingelse.com");
            fetcher().process(curi);
            assertFalse(FetchHTTPTests.httpRequestString(curi).toLowerCase().contains("cookie:"));
            assertTrue(FetchHTTPTests.rawResponseString(curi).contains("Set-Cookie: foo=bar;Domain=somethingelse.com\r\n"));

            // check fetch of original domain has no cookie
            curi = makeCrawlURI("http://example.com:7777/");
            fetcher().process(curi);
            assertFalse(FetchHTTPTests.httpRequestString(curi).toLowerCase().contains("cookie:"));
            assertFalse(FetchHTTPTests.rawResponseString(curi).toLowerCase().contains("set-cookie:"));

            // check fetch of cookie domain has no cookie
            curi = makeCrawlURI("http://somethingelse.com:7777/");
            fetcher().process(curi);
            assertFalse(FetchHTTPTests.httpRequestString(curi).toLowerCase().contains("cookie:"));
            assertFalse(FetchHTTPTests.rawResponseString(curi).toLowerCase().contains("set-cookie:"));

            assertEquals(0, cookieStore.getCookies().size());

            // reject wrong subdomain

            curi = makeCrawlURI("http://FOO.example.com:7777/?name=foo&value=bar&domain=BAR.example.com");
            fetcher().process(curi);
            assertFalse(FetchHTTPTests.httpRequestString(curi).toLowerCase().contains("cookie:"));
            assertTrue(FetchHTTPTests.rawResponseString(curi).contains("Set-Cookie: foo=bar;Domain=bar.example.com\r\n"));

            // check fetch of original domain has no cookie
            curi = makeCrawlURI("http://foo.example.com:7777/");
            fetcher().process(curi);
            assertFalse(FetchHTTPTests.httpRequestString(curi).toLowerCase().contains("cookie:"));
            assertFalse(FetchHTTPTests.rawResponseString(curi).toLowerCase().contains("set-cookie:"));

            // check fetch of cookie domain has no cookie
            curi = makeCrawlURI("http://bar.example.com:7777/");
            fetcher().process(curi);
            assertFalse(FetchHTTPTests.httpRequestString(curi).toLowerCase().contains("cookie:"));
            assertFalse(FetchHTTPTests.rawResponseString(curi).toLowerCase().contains("set-cookie:"));

            assertEquals(0, cookieStore.getCookies().size());
        }

        public void testRejectDomain() throws URIException, IOException, InterruptedException {
            testRejectDomain(simpleCookieStore());
            testRejectDomain(bdbCookieStore());
        }

        protected void testSubdomainParentDomain(AbstractCookieStore cookieStore) throws URIException, IOException, InterruptedException {
            cookieStore.clear();
            fetcher().setCookieStore(cookieStore);

            CrawlURI curi = makeCrawlURI("http://FOO.example.com:7777/?name=foo&value=bar&domain=example.com");
            fetcher().process(curi);
            assertFalse(FetchHTTPTests.httpRequestString(curi).toLowerCase().contains("cookie:"));
            assertTrue(FetchHTTPTests.rawResponseString(curi).contains("Set-Cookie: foo=bar;Domain=example.com\r\n"));

            curi = makeCrawlURI("http://FOO.example.com:7777/");
            fetcher().process(curi);
            assertTrue(FetchHTTPTests.httpRequestString(curi).contains("Cookie: foo=bar\r\n"));
            assertFalse(FetchHTTPTests.rawResponseString(curi).toLowerCase().contains("set-cookie:"));

            curi = makeCrawlURI("http://BAR.example.com:7777/");
            fetcher().process(curi);
            assertTrue(FetchHTTPTests.httpRequestString(curi).contains("Cookie: foo=bar\r\n"));
            assertFalse(FetchHTTPTests.rawResponseString(curi).toLowerCase().contains("set-cookie:"));

            curi = makeCrawlURI("http://example.com:7777/");
            fetcher().process(curi);
            assertTrue(FetchHTTPTests.httpRequestString(curi).contains("Cookie: foo=bar\r\n"));
            assertFalse(FetchHTTPTests.rawResponseString(curi).toLowerCase().contains("set-cookie:"));

            curi = makeCrawlURI("http://somethingelse.com:7777/");
            fetcher().process(curi);
            assertFalse(FetchHTTPTests.httpRequestString(curi).toLowerCase().contains("cookie:"));
            assertFalse(FetchHTTPTests.rawResponseString(curi).toLowerCase().contains("set-cookie:"));

            assertEquals(1, cookieStore.getCookies().size());
        }

        public void testSubdomainParentDomain() throws URIException, IOException, InterruptedException {
            testSubdomainParentDomain(simpleCookieStore());
            testSubdomainParentDomain(bdbCookieStore());
        }

        protected void testIPAddress(AbstractCookieStore cookieStore) throws URIException, IOException, InterruptedException {
            cookieStore.clear();
            fetcher().setCookieStore(cookieStore);

            CrawlURI curi = makeCrawlURI("http://10.0.0.1:7777/?name=foo&value=bar");
            fetcher().process(curi);
            assertFalse(FetchHTTPTests.httpRequestString(curi).toLowerCase().contains("cookie:"));
            assertTrue(FetchHTTPTests.rawResponseString(curi).contains("Set-Cookie: foo=bar\r\n"));

            curi = makeCrawlURI("http://10.0.0.1:7777/");
            fetcher().process(curi);
            assertTrue(FetchHTTPTests.httpRequestString(curi).contains("Cookie: foo=bar\r\n"));
            assertFalse(FetchHTTPTests.rawResponseString(curi).toLowerCase().contains("set-cookie:"));

            curi = makeCrawlURI("http://192.168.0.1:7777/");
            fetcher().process(curi);
            assertFalse(FetchHTTPTests.httpRequestString(curi).toLowerCase().contains("cookie:"));
            assertFalse(FetchHTTPTests.rawResponseString(curi).toLowerCase().contains("set-cookie:"));

            curi = makeCrawlURI("http://10.0.0.2:7777/");
            fetcher().process(curi);
            assertFalse(FetchHTTPTests.httpRequestString(curi).toLowerCase().contains("cookie:"));
            assertFalse(FetchHTTPTests.rawResponseString(curi).toLowerCase().contains("set-cookie:"));

            curi = makeCrawlURI("http://example.com:7777/");
            fetcher().process(curi);
            assertFalse(FetchHTTPTests.httpRequestString(curi).toLowerCase().contains("cookie:"));
            assertFalse(FetchHTTPTests.rawResponseString(curi).toLowerCase().contains("set-cookie:"));

            assertEquals(1, cookieStore.getCookies().size());
        }

        public void testIPAddress() throws URIException, IOException, InterruptedException {
            testIPAddress(simpleCookieStore());
            testIPAddress(bdbCookieStore());
        }
    }
}
TOP

Related Classes of org.archive.modules.fetcher.CookieFetchHTTPIntegrationTest$CookieFetchHTTPIntegrationTests

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.