Package edu.umd.cloud9.collection.aquaint2

Source Code of edu.umd.cloud9.collection.aquaint2.Aquaint2ForwardIndex

package edu.umd.cloud9.collection.aquaint2;

import java.io.IOException;

import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;

import edu.umd.cloud9.collection.DocumentForwardIndex;

/**
* Object representing a document forward index for AQUAINT2 collections.
*
* @author Jimmy Lin
*/
public class Aquaint2ForwardIndex implements DocumentForwardIndex<Aquaint2Document> {
  private static final Logger LOG = Logger.getLogger(Aquaint2ForwardIndex.class);

  private long[] offsets;
  private int[] lengths;
  private FSDataInputStream input;
  private Aquaint2DocnoMapping docnoMapping = new Aquaint2DocnoMapping();
  private String collectionPath;

  @Override
  public int getDocno(String docid) {
    return docnoMapping.getDocno(docid);
  }

  @Override
  public String getDocid(int docno) {
    return docnoMapping.getDocid(docno);
  }

  @Override
  public int getLastDocno() {
    return offsets.length-1;
  }

  @Override
  public int getFirstDocno() {
    return 1;
  }

  @Override
  public String getCollectionPath() {
    return collectionPath;
  }

  @Override
  public Aquaint2Document getDocument(String docid) {
    return getDocument(docnoMapping.getDocno(docid));
  }

  @Override
  public Aquaint2Document getDocument(int docno) {
    Aquaint2Document doc = new Aquaint2Document();

    try {
      LOG.debug("docno " + docno + ": byte offset " + offsets[docno] + ", length "
          + lengths[docno]);

      input.seek(offsets[docno]);

      byte[] arr = new byte[lengths[docno]];

      input.read(arr);

      Aquaint2Document.readDocument(doc, new String(arr));
    } catch (IOException e) {
      e.printStackTrace();
    }

    return doc;
  }

  @Override
  public void loadIndex(Path index, Path mapping, FileSystem fs) throws IOException {
    FSDataInputStream in = fs.open(index);

    // Read and throw away.
    in.readUTF();
    collectionPath = in.readUTF();

    // Docnos start at one, so we need an array that's one larger than number of docs.
    int sz = in.readInt() + 1;
    offsets = new long[sz];
    lengths = new int[sz];

    for (int i = 1; i < sz; i++) {
      offsets[i] = in.readLong();
      lengths[i] = in.readInt();
    }
    in.close();

    input = fs.open(new Path(collectionPath));
    docnoMapping.loadMapping(mapping, fs);
  }
}
TOP

Related Classes of edu.umd.cloud9.collection.aquaint2.Aquaint2ForwardIndex

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.