Package com.marklogic.tarantula.test

Source Code of com.marklogic.tarantula.test.HTTPCrawlerTest

package com.marklogic.tarantula.test;

import com.marklogic.ps.test.XQueryTestCase;
import com.marklogic.xcc.ValueFactory;
import com.marklogic.xcc.types.*;
import com.marklogic.xcc.ResultSequence;

import java.io.*;

public class HTTPCrawlerTest extends XQueryTestCase {

  private String modulePath = "/util/tarantula.xqy";
  private String modulePath2 = "util/link-queue.xqy";
 
  private String sampleURL1 = "http://en.wikipedia.org/wiki/Star_wars";
  private String sampleURL2 = "http://upload.wikimedia.org/wikipedia/commons/6/6c/Star_Wars_Logo.svg";
  private String sampleURL3 = "http://en.wikipedia.org/wiki/User:Tkgd2007";
 
  protected void setUp() throws Exception {
    super.setUp();
    executeLibraryModule("/application/models/crawl-model.xqy",
        "http://www.marklogic.com/tarantula/crawl",
        "init", null);
  }
 
  public void testHTTPGetHTMLContent() throws Exception {
    //Initialize the variable
    XdmVariable[] variables = new XdmVariable[] {
        ValueFactory.newVariable(new XName("url"), ValueFactory.newXSString(sampleURL1))};
    ResultSequence rs = this.executeMainModule(modulePath, null, variables);
    String q = "fn:doc('" + sampleURL1 + "')//*:title/text()";
    ResultSequence rs2 = executeQuery(q, null, null);
    String qStr = rs2.asString();
    System.out.println(rs.asString());
    System.out.println(qStr);
    assertEquals("Star Wars - Wikipedia, the free encyclopedia", qStr);   
  }
 
  public void testGetLinkQueue() throws Exception {
    //Initialize the variable
    XdmVariable[] variables = new XdmVariable[] {
        ValueFactory.newVariable(new XName("url"), ValueFactory.newXSString(sampleURL1))};
    ResultSequence rs = this.executeMainModule(modulePath2, null, variables);
    String q = "fn:count(fn:doc('" + sampleURL1 + "')//*:link)";
    ResultSequence rs2 = executeQuery(q, null, null);
    String qStr = rs2.asString();
    try {
        // Create file
        FileWriter fstream = new FileWriter("link-queue.txt");
        BufferedWriter out = new BufferedWriter(fstream);
        out.write(rs.asString());
        //Close the output stream
        out.close();
    }
    catch (Exception e) { //Catch exception if any
          System.err.println("Error: " + e.getMessage());
   
    }
    System.out.println(rs.asString());
    //System.out.println(qStr);
    assertTrue(Integer.parseInt(qStr) > 0)
  }
 
   
}
TOP

Related Classes of com.marklogic.tarantula.test.HTTPCrawlerTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.