Package edu.umd.cloud9.collection.clue

Examples of edu.umd.cloud9.collection.clue.ClueWarcForwardIndex.loadIndex()


      String outputFile = conf.get("OutputFile");
      String findexFile = conf.get("ForwardIndexFile");
      String docnoMapping = conf.get("DocnoMappingFile");

      ClueWarcForwardIndex findex = new ClueWarcForwardIndex();
      findex.loadIndex(new Path(findexFile), new Path(docnoMapping), FileSystem.get(conf));

      FileSystem fs = FileSystem.get(conf);

      sLogger.info("reading " + inputFile);
View Full Code Here


        "-index=" + index };

    IntegrationUtils.exec(Joiner.on(" ").join(args));

    ClueWarcForwardIndex findex = new ClueWarcForwardIndex();
    findex.loadIndex(new Path(index), new Path(mappingFile), fs);

    assertTrue(findex.getDocument(14069750).getContent()
        .contains("Vizergy: How Design and SEO work together"));
    assertTrue(findex.getDocument("clueweb09-en0008-76-19728").getContent()
        .contains("Jostens - Homeschool Yearbooks"));
View Full Code Here

      String outputFile = conf.get("OutputFile");
      String findexFile = conf.get("ForwardIndexFile");
      String docnoMapping = conf.get("DocnoMappingFile");

      ClueWarcForwardIndex findex = new ClueWarcForwardIndex();
      findex.loadIndex(new Path(findexFile), new Path(docnoMapping), FileSystem.get(conf));

      FileSystem fs = FileSystem.get(conf);

      sLogger.info("reading " + inputFile);
View Full Code Here

        "-docnoMapping=" + mappingFile };

    IntegrationUtils.exec(Joiner.on(" ").join(args));

    TrecForwardIndex findex = new TrecForwardIndex();
    findex.loadIndex(new Path(index), new Path(mappingFile), fs);

    assertTrue(findex.getDocument(1).getContent().contains("Newspapers in the Former Yugoslav Republic"));
    assertTrue(findex.getDocument("FBIS3-1").getContent().contains("Newspapers in the Former Yugoslav Republic"));
    assertEquals(1, findex.getFirstDocno());
    assertEquals(472525, findex.getLastDocno());
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.