Package com.slytechs.capture.file.indexer

Source Code of com.slytechs.capture.file.indexer.SoftRegionIndexer

/**
* Copyright (C) 2007 Sly Technologies, Inc. This library is free software; you
* can redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version. This
* library is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details. You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
package com.slytechs.capture.file.indexer;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.jnetstream.capture.file.HeaderReader;

import com.slytechs.capture.file.editor.BasicRecordIterator;
import com.slytechs.capture.file.editor.PartialLoader;
import com.slytechs.utils.Size;
import com.slytechs.utils.event.ProgressTask;
import com.slytechs.utils.event.SuperProgressTask;
import com.slytechs.utils.io.IORuntimeException;
import com.slytechs.utils.region.FlexRegion;

/**
* @author Mark Bednarczyk
* @author Sly Technologies, Inc.
*/
public class SoftRegionIndexer implements RegionIndexer {
  private static final Log logger = LogFactory.getLog(SoftRegionIndexer.class);

  private static final int LARGE_FILE_SAMPLE = 3000;

  private static final long LARGE_FILE_THRESHOLD = 10 * 1024 * 1024;

  private static final long MAX_HARD_RECORDS = 10000;

  private static final int MIN_FACTOR_SIZE = 500;

  private static final int SMALL_FILE_SAMPE = 300;

  private int factor = 0;

  private int length = 0;

  private final HeaderReader lengthGetter;

  private SuperProgressTask task;

  /*
   * TODO add implementation of partial region indexer
   */
  @SuppressWarnings("unused")
  private final PartialLoader loader;

  private IndexTable[] table;

  private ProgressTask scanTask;

  /**
   * @param data
   * @param name
   * @throws IOException
   */
  public SoftRegionIndexer(final FlexRegion<RegionIndexer> region,
      final long length, final PartialLoader sData) throws IOException {

    this(sData);
    /*
     * Just in case we have use soft indexes which will require rescanning using
     * the partial loader
     */
    // this.loader = sData;
    // this.lengthGetter = sData.getLengthGetter();
    //
    // RegionSegment<RegionIndexer> segment = region.getSegment(0);
    // RegionSegment linked = (RegionSegment) segment.getLinkedSegment();
    // RegionIndexer indexer = segment.getData();
    //
    // for (int i = 0; i < length; i++) {
    //
    // if (segment.checkBoundsGlobal(i) == false) {
    // segment = region.getSegment(i);
    // linked = (RegionSegment) segment.getLinkedSegment();
    // indexer = segment.getData();
    // }
    //
    // final long regional = segment.mapGlobalToRegional(i);
    //
    // final long position = linked.mapRegionalToGlobal(indexer
    // .mapIndexToPositionRegional((int) regional));
    //
    // }
  }

  /**
   * Indexes the entire unsegmented region. The region can not be segmented
   *
   * @param target
   * @throws IOException
   */
  public SoftRegionIndexer(final PartialLoader loader) throws IOException {

    task = new SuperProgressTask("indexer");
    scanTask = task.addTask("scan file for records", loader.getLength());

    this.loader = loader;

    this.calculateFactor(loader);

    if (loader != null) {
      this.lengthGetter = loader.getLengthGetter();
      this.table = this.createIndexTableFromLoader(loader, this.factor);

      logger.trace("table=%d" + this.table.length);
    } else {
      this.lengthGetter = null;
    }

  }

  private void calculateFactor(final PartialLoader loader) throws IOException {
    /*
     * Calculate the soft index factor which determines how many soft indexes we
     * keep per every hard index. For very large files take a sample of packets
     * and calculate average size. For smaller files simply assume the minimum
     * of 30 bytes per packet as average.
     */
    final int ave =
        (loader.getLength() > SoftRegionIndexer.LARGE_FILE_THRESHOLD ? this
            .takeAverageSample(loader, SoftRegionIndexer.LARGE_FILE_SAMPLE)
            : this
                .takeAverageSample(loader, SoftRegionIndexer.SMALL_FILE_SAMPE));

    final long estPacketCount = loader.getLength() / ave;
    this.factor = (int) (estPacketCount / SoftRegionIndexer.MAX_HARD_RECORDS);

    /*
     * Make sure we use a reasonable minimum to be efficient
     */
    if ((this.factor != 0) && (this.factor < SoftRegionIndexer.MIN_FACTOR_SIZE)) {
      this.factor = SoftRegionIndexer.MIN_FACTOR_SIZE;
    }

    logger.trace("size=" + loader.getLength() + ", ave=" + ave + ", est="
        + estPacketCount + ", factor=" + this.factor);
  }

  /**
   * Method will iterate over the entire region full of records and index each
   * record's position in an array. Record positions in regional address space
   * are always constant, therefore once the region is indexed once, we don't
   * have to worry about positions any more.
   *
   * @param loader
   * @param factor
   *          TODO
   * @return
   * @throws IOException
   */
  private IndexTable[] createIndexTableFromLoader(final PartialLoader loader,
      final int factor) throws IOException {

    final BasicRecordIterator iterator =
        new BasicRecordIterator(loader, this.lengthGetter);
    final int capacity = (factor == 0 ? 10000 : factor);

    final List<Long> temp = new ArrayList<Long>(capacity); // Rough estimate
    final List<IndexTable> it = new ArrayList<IndexTable>(100);

    long next = 0;
    long previous = 0;
    while (iterator.hasNext()) {
      final long regional = iterator.getPosition();
      temp.add(regional);
      iterator.skip();

      this.length++;
      if ((factor != 0) && (this.length % factor == 0)) {
        it.add(new SoftTable(temp, loader));
        temp.clear();
      }
     
      if (regional > next) {
        scanTask.update(regional - previous);
        next = regional + 200 * Size.OneMeg.bytes();
       
        previous = regional;
      }
    }
   
    scanTask.finish();

    if (temp.isEmpty() == false) {

      if (factor > 2) {
        it.add(new HardTable(temp));

      } else {
        it.add(new SoftTable(temp, loader));
      }
    }

    /*
     * Now lets turn the list into more efficient array since regions are not
     * mutable we can do that.
     */
    final IndexTable[] array = it.toArray(new IndexTable[it.size()]);

    return array;
  }

  /**
   * @return
   */
  public long getLength() {
    return this.length;
  }

  public long mapIndexToPositionRegional(final int regional) {
    if ((regional < 0) || (regional >= this.length)) {
      throw new IndexOutOfBoundsException("Regional index [" + regional
          + "] is out of bounds [0 - " + (this.length - 1) + "].");
    }

    final int it = (this.factor == 0 ? 0 : regional / this.factor);
    final int index = (this.factor == 0 ? regional : regional % this.factor);

    try {
      final long position = this.table[it].get(index);

      return position;

    } catch (final IOException e) {
      throw new IORuntimeException(e);
    }
  }

  /**
   * @param start
   * @return
   */
  public long mapSRegionalToTRegional(final long sRegional) {
    long r = -1;

    try {
      for (int i = 0; i < this.table.length; i++) {
        if ((r = this.table[i].search(sRegional)) != -1) {
          r += i * this.factor;
        }
      }
    } catch (final IOException e) {
      throw new IORuntimeException(e);
    }

    return r;
  }

  /**
   * Takes a certain sample packet average by reading the first max number of
   * packets.
   *
   * @param loader
   * @param max
   * @return
   * @throws IOException
   */
  private int takeAverageSample(final PartialLoader loader, final int max)
      throws IOException {
    final BasicRecordIterator iterator =
        new BasicRecordIterator(loader, loader.getLengthGetter());

    int count = 0;
    int total = 0;
    while (iterator.hasNext() && (count < max)) {
      final ByteBuffer b = iterator.next();
      total += loader.getLengthGetter().readLength(b);

      count++;
    }


    final int ave = total / count;

    return ave;
  }

  @Override
  public String toString() {
    final StringBuilder b = new StringBuilder("Idx");

    return b.toString();
  }

  /*
   * (non-Javadoc)
   *
   * @see com.slytechs.capture.file.indexer.RegionIndexer#keepInMemory(long,
   *      long)
   */
  public Object keepInMemory(long start, long count) throws IOException {
    count = (start + count <= this.length ? count : this.length - start);
   
    task = new SuperProgressTask("indexer");

    final int first = (this.factor == 0 ? 0 : (int) start / this.factor);
    final int end =
        (this.factor == 0 ? 1 : (int) (start + count) / this.factor) + 1;

    scanTask = task.addTask("locking indexes into memory", end - start);

    final List<Object> list = new ArrayList<Object>(10);

    for (int i = first; i < end; i++) {
      list.add(table[i].keepInMemory(0, 1));
     
      if (i % 1000 == 0) {
        scanTask.update(1000);
      }
    }

    scanTask.finish();
   
    return list.toArray();
  }

}
TOP

Related Classes of com.slytechs.capture.file.indexer.SoftRegionIndexer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.