Package ar.ext.textfile

Source Code of ar.ext.textfile.DelimitedFile

package ar.ext.textfile;

import java.awt.geom.Rectangle2D;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.List;
import java.util.concurrent.ForkJoinPool;

import ar.Glyph;
import ar.Glyphset;
import ar.glyphsets.BoundsTask;
import ar.glyphsets.SimpleGlyph;
import ar.glyphsets.implicitgeometry.Indexed;
import ar.glyphsets.implicitgeometry.Indexed.Converter;
import ar.glyphsets.implicitgeometry.Shaper;
import ar.glyphsets.implicitgeometry.Valuer;

import org.supercsv.io.CsvListReader;
import org.supercsv.prefs.CsvPreference;

/**Given a file with line-oriented, regular-expression delimited values,
* provides a list-like (read-only) interface.
*/
public class DelimitedFile<G,I> implements Glyphset<G,I> {
  /**Number of lines to skip by default.  Captured at object creation time.**/
  public static int DEFAULT_SKIP =0;
 
  /**Source file.**/
  private final File source;
 
  /**Segment information for subsets, in terms of file bytes**/
  private final long segStart;
  private final long segEnd;
 
  /**Character used to delimit fields of the rows.**/
  private final char delimiter;
 
  /**Types of the fields.**/
  private final Converter.TYPE[] types;
 
  /**Number of lines to skip at the start of the file.**/
  private final int skip;

  private final Shaper<Indexed,G> shaper;
  private final Valuer<Indexed,I> valuer;

  ///Cached items.
  private long size =-1;
  private Rectangle2D bounds;

   
  /**
   * @param source File to pull from
   * @param delimiter Field delimiter
   * @param types List of field types
   * @param shaper
   * @param valuer
   */
  public DelimitedFile(File source, char delimiter, Converter.TYPE[] types, Shaper<Indexed,G> shaper, Valuer<Indexed, I> valuer) {this(source, delimiter, types, DEFAULT_SKIP, shaper, valuer);}
  public DelimitedFile(File source, char delimiter, Converter.TYPE[] types, int skip, Shaper<Indexed,G> shaper, Valuer<Indexed, I> valuer) {this(source, delimiter, types, skip, shaper, valuer, 0, -1);}
  public DelimitedFile(File source, char delimiter, Converter.TYPE[] types, int skip, Shaper<Indexed,G> shaper, Valuer<Indexed, I> valuer, long segStart, long segEnd) {
    this.source = source;
    this.delimiter = delimiter;
    this.types = types;
    this.skip = skip;
    this.shaper = shaper;
    this.valuer = valuer;
    this.segStart = segStart;
    this.segEnd = segEnd;
  }

 

 
  @Override
  public Rectangle2D bounds() {
    if (bounds == null) {
      bounds = bounds(2);
    }
    return bounds;
  }
 
 
  public Rectangle2D bounds(int m) {
    int procs = Runtime.getRuntime().availableProcessors();
    ForkJoinPool pool = new ForkJoinPool(procs);
    bounds = pool.invoke(new BoundsTask<>(this, m*procs));
    return bounds;
  }
   
  @Override
  public Glyphset<G, I> segmentAt(int count, int segId) throws IllegalArgumentException {
    long stride = (source.length()/count)+1; //+1 for the round-down
    long low = stride*segId;
    long high = Math.min(low+stride, source.length());

    return new DelimitedFile<>(source, delimiter, types, skip, shaper, valuer, low, high);
  }
 
  @Override public boolean isEmpty() {return source.length() == 0;}
  @Override public Iterator iterator() {
    return new Iterator();
  }

  @Override
  public long size() {
    if (size <0) {
      try (BufferedReader r = new BufferedReader(new FileReader(source))) {
        size=0;
        while(r.readLine() != null) {size++;}
      } catch (IOException e) {
        throw new RuntimeException("Error processing file: " + source.getName());
      }
    }
    size = size-skip;
    return size;
  }
 
  private final class Iterator implements java.util.Iterator<Glyph<G,I>> {
    private final Converter conv = new Converter(types);
    private final CsvListReader base;
    private int charsRead;
    private Glyph<G,I> cached;
   
    public Iterator() {
      try {
        FileReader core = new FileReader(source);
        //Get to the first record-start in the segment
        core.skip(segStart);
        base = new CsvListReader(core, CsvPreference.STANDARD_PREFERENCE);

        //Get to the first full record in the segment
        if (segStart == 0) {
          for (long i=skip; i>0; i--) {base.read();}         
        } else {
          base.read();
        }
      } catch (IOException e) {
        throw new RuntimeException("Error initializing iterator for " + source.getName(), e);
      }
    }
   
    @Override
    protected void finalize() {
      try {if (base != null) {base.close();}}
      catch (IOException e) {e.printStackTrace();}
    }
   
    @Override
    public boolean hasNext() {
      cacheNext();
      return cached != null;
    }
   
    @Override
    public Glyph<G,I> next() {
      cacheNext();
      Glyph<G,I> next = this.cached;
      cached = null;
      return next;
    }
   
    private void cacheNext() {
      if (cached != null) {return;}
      if (segEnd > 0 && charsRead > segEnd) {return;}
     
      List<String> next;
      try {next = base.read();}
      catch (IOException e) {throw new RuntimeException(String.format("Error reading around character %d of %s.", charsRead, source.getName()), e);}
      if (next == null) {return;}
     
      try {
        for (String s: next) {charsRead += s.length();} //TODO: Probably not the fastest way to do this...
        Indexed base = conv.applyTo(new Indexed.ListWrapper(next));
        cached = new SimpleGlyph<>(shaper.shape(base), valuer.value(base));
      } catch (Exception e) {throw new RuntimeException(String.format("Error constructing glyph around character %d of %s", charsRead, source.getName()), e);}
    }

    @Override public void remove() {throw new UnsupportedOperationException();}
  }
 
}
TOP

Related Classes of ar.ext.textfile.DelimitedFile

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.