Package it.unimi.dsi.law.warc.util

Examples of it.unimi.dsi.law.warc.util.WarcHttpResponse


      GZWarcRecord record = new GZWarcRecord();
      Filter<WarcRecord> filter = Filters.adaptFilterBURL2WarcRecord (new TrueFilter());
      WarcFilteredIterator it = new WarcFilteredIterator(in, record, filter);

      WarcHttpResponse response = new WarcHttpResponse();

      Graph mdGraph = new org.openrdf.model.impl.GraphImpl();
      String mdGraphURI = "http://challenge.semanticweb.org/2008/metadata";
      ValueFactory vf = mdGraph.getValueFactory();
      String dcNS = "http://purl.org/dc/elements/1.1/";
      DatatypeFactory dtf = null;

      try {
    dtf = DatatypeFactory.newInstance();
      } catch (DatatypeConfigurationException e1) {
    // TODO Auto-generated catch block
    e1.printStackTrace();
      }

      GregorianCalendar c = new GregorianCalendar ();

      try {
    int cnt = 0;

    //      while (cnt < 10 && it.hasNext()) {
    while (it.hasNext()) {

        WarcRecord nextRecord = it.next();

        //Get the HttpResponse
        try {
      response.fromWarcRecord (nextRecord);

      if (debugMode) {
          System.out.println("RECORD     : " + String.format("%05d", cnt));
          System.out.println(" subjectUri: " + nextRecord.header.subjectUri);
          System.out.println("contentType: " + nextRecord.header.contentType);
          System.out.println(" dataLength: " + nextRecord.header.dataLength);
          System.out.println("actual data: " + nextRecord.block.length());
          System.out.println("    missing: " +
            (nextRecord.header.dataLength - nextRecord.block.length()) + "b");
      }
      l.output(nextRecord.header.subjectUri.toString());

      URI s, p, o;
      Literal lit;
      if (cnt == max ){ return ; }
      if (cnt >= min && cnt < max) {

          s = vf.createURI(nextRecord.header.subjectUri.toString());
          p = vf.createURI(dcNS, "source");
          lit = vf.createLiteral(inFile);

          mdGraph.add(s,p,lit);

          c.setTime(nextRecord.header.creationDate);
          XMLGregorianCalendar xc = dtf.newXMLGregorianCalendar(c);

          p = vf.createURI(dcNS, "date");
          lit = vf.createLiteral(xc);

          mdGraph.add(s,p,lit);

          curFile = w.write(response.contentAsStream(), cnt, start);

          try {
        ldAddStmt.setString(1, curFile);
        ldAddStmt.setString(2, nextRecord.header.subjectUri.toString());
        ResultSet res = ldAddStmt.executeQuery();
View Full Code Here

TOP

Related Classes of it.unimi.dsi.law.warc.util.WarcHttpResponse

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.