Package ivory.integration.irj2012

Source Code of ivory.integration.irj2012.VerifyFeatureExtraction

package ivory.integration.irj2012;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import ivory.ffg.driver.DocumentVectorOnTheFlyIndexing;
import ivory.ffg.driver.RankAndFeaturesSmallAdaptive;
import ivory.ffg.preprocessing.GenerateCompressedPositionalPostings;
import ivory.ffg.preprocessing.GenerateDocumentVectors;

import java.io.File;

import junit.framework.JUnit4TestAdapter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;
import org.junit.Test;

public class VerifyFeatureExtraction {
  private static final Logger LOG = Logger.getLogger(VerifyFeatureExtraction.class);

  private static final String[] FEATURES = {
    "1  622182  0.0 8.5106735 0.0 -21.956047 -20.940353 -21.956047 ",
    "1  939420  0.0 8.111909 0.0 -22.17752 -21.539658 -22.17752 ",
    "1  1309504  0.0 10.92844 0.0 -23.21772 -18.169184 -23.21772 ",
    "1  1309565  0.0 8.300849 0.0 -23.40636 -21.933271 -23.40636 ",
    "1  2107067  4.5951195 10.087684 4.5951195 -17.570501 -18.763742 -17.570501 ",
    "1  3213263  0.0 8.32819 0.0 -22.91655 -21.601917 -22.91655 ",
    "1  3991819  0.0 10.222314 0.0 -21.487642 -18.61485 -21.487642 ",
    "1  4677357  0.0 8.744904 0.0 -21.57198 -20.75782 -21.57198 ",
    "1  5778775  0.0 7.911496 0.0 -22.035013 -21.80349 -22.035013 ",
    "1  6043395  0.0 7.835582 0.0 -22.71848 -21.526793 -22.71848 ",
    "2  24249210  15.735939 22.211124 15.692177 -16.604282 -20.48779 -16.78569 ",
    "2  24249213  15.214951 20.727518 15.214951 -17.903378 -23.020555 -17.903378 ",
    "2  24249232  14.704382 22.109085 10.109262 -18.818066 -21.37287 -22.436802 ",
    "2  24249235  14.596262 21.126617 10.0011425 -18.759815 -23.215107 -22.378551 ",
    "2  24249237  10.140953 20.56073 10.140953 -22.563118 -23.267324 -22.563118 ",
    "2  24249238  10.619831 20.861208 10.619831 -21.24904 -22.737915 -21.24904 ",
    "2  24249239  10.211376 20.72454 10.211376 -21.987541 -22.518667 -21.987541 ",
    "2  24249240  10.109262 20.376194 10.109262 -22.579971 -23.695538 -22.579971 ",
    "2  24249243  14.596262 21.139387 15.106831 -19.567122 -23.415813 -18.887474 ",
    "2  24249262  15.570984 20.983067 15.570984 -17.326145 -22.088848 -17.326145 ",
    "3  245  0.0 1.8352408 0.0 0.0 -6.8256583 0.0 ",
    "3  272  0.0 1.8170375 0.0 0.0 -6.861235 0.0 ",
    "3  278  0.0 1.9549259 0.0 0.0 -6.575493 0.0 ",
    "3  1411  0.0 1.7040439 0.0 0.0 -7.071218 0.0 ",
    "3  1869  0.0 2.3621986 0.0 0.0 -5.9624248 0.0 ",
    "3  2448  0.0 2.0067103 0.0 0.0 -6.791424 0.0 ",
    "3  2470  0.0 1.6766067 0.0 0.0 -7.1198583 0.0 ",
    "3  2764  0.0 1.8759606 0.0 0.0 -6.7439375 0.0 ",
    "3  2988  0.0 2.1518514 0.0 0.0 -6.455552 0.0 ",
    "3  3025  0.0 1.9815545 0.0 0.0 -6.5151377 0.0 ",
    "4  97374  0.0 6.3133283 0.0 0.0 -6.3661027 0.0 ",
    "4  168240  0.0 5.2482157 0.0 0.0 -7.131438 0.0 ",
    "4  195995  0.0 4.8623586 0.0 0.0 -7.409898 0.0 ",
    "4  196007  0.0 5.2050014 0.0 0.0 -7.1644793 0.0 ",
    "4  196014  0.0 4.8719454 0.0 0.0 -7.4033713 0.0 ",
    "4  442831  0.0 4.7180376 0.0 0.0 -7.5062284 0.0 ",
    "4  502657  0.0 5.027849 0.0 0.0 -7.294698 0.0 ",
    "4  502749  0.0 5.010241 0.0 0.0 -7.3072276 0.0 ",
    "4  503251  0.0 5.15173 0.0 0.0 -7.204485 0.0 ",
    "4  517535  0.0 4.817546 0.0 0.0 -7.440185 0.0 ",
    "5  46285  0.0 5.6725864 0.0 -12.171908 -15.081589 -12.171908 ",
    "5  71417  0.0 7.576192 0.0 -10.463681 -13.751675 -10.463681 ",
    "5  71449  0.0 7.529799 0.0 -11.411591 -14.297478 -11.411591 ",
    "5  71461  0.0 9.100533 0.0 -10.601865 -12.391013 -10.601865 ",
    "5  71463  0.0 7.5188546 0.0 -10.5033 -13.814863 -10.5033 ",
    "5  71465  0.0 7.4566336 0.0 -10.545237 -13.882295 -10.545237 ",
    "5  71481  0.0 7.591035 0.0 -10.453264 -13.735146 -10.453264 ",
    "5  71497  0.0 8.521038 0.0 -10.388376 -12.953309 -10.388376 ",
    "5  71518  0.0 7.430893 0.0 -10.562283 -13.90986 -10.562283 ",
    "5  81259  0.0 5.893096 0.0 -11.648296 -15.3101015 -11.648296 "
  };

  private static final String[] VECTOR_TYPES = {
    "ivory.ffg.data.DocumentVectorHashedArray",
    "ivory.ffg.data.DocumentVectorVIntArray",
    "ivory.ffg.data.DocumentVectorPForDeltaArray",
    "ivory.ffg.data.DocumentVectorMiniInvertedIndex"
  };

  private static final String IVORY_INDEX_PATH = "/scratch0/indexes/adhoc/clue.en.01/";
  private static final String SPAM_PATH = "/scratch0/indexes/adhoc/CIKM2012/docscores-spam.dat.en.01";
  private static final String QUERY_PATH = "data/ivory/ffg/queries.xml";
  private static final String FEATURES_PATH = "data/ivory/ffg/features.xml";
  private static final String DOCUMENTS_PATH = "data/ivory/ffg/documents.txt";

  @Test public void runRegression() throws Exception {
    FileSystem fs = FileSystem.getLocal(new Configuration());
    File vectors = File.createTempFile("ffgDocumentVector", null);
    File features = File.createTempFile("ffgFeatures", null);

    // Verify Monolithic (Small Adaptive to retrieve and compute features)
    String[] paramsCompressedPostings = new String[] {
      "-index", VerifyFeatureExtraction.IVORY_INDEX_PATH,
      "-spam", VerifyFeatureExtraction.SPAM_PATH,
      "-query", VerifyFeatureExtraction.QUERY_PATH,
      "-output", vectors.getPath()
    };

    String[] paramsMonolithic = new String[] {
      "-index", VerifyFeatureExtraction.IVORY_INDEX_PATH,
      "-posting", vectors.getPath(),
      "-query", VerifyFeatureExtraction.QUERY_PATH,
      "-candidate", VerifyFeatureExtraction.DOCUMENTS_PATH,
      "-feature", VerifyFeatureExtraction.FEATURES_PATH,
      "-hits", "10",
      "-spam", VerifyFeatureExtraction.SPAM_PATH,
      "-output", features.getPath()
    };

    GenerateCompressedPositionalPostings postingsGenerator =
      new GenerateCompressedPositionalPostings();
    long start = System.currentTimeMillis();
    postingsGenerator.main(paramsCompressedPostings);
    long end = System.currentTimeMillis();
    LOG.info("Total postings generation time: " + (end - start) + "ms");

    RankAndFeaturesSmallAdaptive.main(paramsMonolithic);

    FSDataInputStream input = fs.open(new Path(features.getPath()));
    String line;
    int i = 0;
    while((line = input.readLine()) != null) {
      assertTrue(FEATURES[i++].trim().equals(line.trim()));
    }
    input.close();
    assertEquals(FEATURES.length, i);
    LOG.info("Monolithic output checked.");

    // Verify Document Vectors
    for(String vectorType: VECTOR_TYPES) {
      String[] paramsDocumentVector = new String[] {
        "-index", VerifyFeatureExtraction.IVORY_INDEX_PATH,
        "-dvclass", vectorType,
        "-candidate", VerifyFeatureExtraction.DOCUMENTS_PATH,
        "-output", vectors.getPath()
      };

      String[] paramsDecoupled = new String[] {
        "-index", VerifyFeatureExtraction.IVORY_INDEX_PATH,
        "-dvclass", vectorType,
        "-document", vectors.getPath(),
        "-query", VerifyFeatureExtraction.QUERY_PATH,
        "-candidate", VerifyFeatureExtraction.DOCUMENTS_PATH,
        "-feature", VerifyFeatureExtraction.FEATURES_PATH,
        "-output", features.getPath()
      };

      GenerateDocumentVectors dvGenerator =
        new GenerateDocumentVectors();
      start = System.currentTimeMillis();
      dvGenerator.main(paramsDocumentVector);
      end = System.currentTimeMillis();
      LOG.info("Total document vector generation time: " + (end - start) + "ms");

      DocumentVectorOnTheFlyIndexing.main(paramsDecoupled);

      input = fs.open(new Path(features.getPath()));
      i = 0;
      while((line = input.readLine()) != null) {
        assertTrue(FEATURES[i++].trim().equals(line.trim()));
      }
      input.close();
      assertEquals(FEATURES.length, i);
      LOG.info(vectorType + " output checked.");
    }

    fs.delete(new Path(vectors.getPath()), true);
    fs.delete(new Path(features.getPath()), true);
  }

  public static junit.framework.Test suite() {
    return new JUnit4TestAdapter(VerifyFeatureExtraction.class);
  }
}
TOP

Related Classes of ivory.integration.irj2012.VerifyFeatureExtraction

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.