Package org.rascalmpl.library.lang.csv

Source Code of org.rascalmpl.library.lang.csv.IOCompiled

package org.rascalmpl.library.lang.csv;

import java.io.BufferedWriter;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Pattern;

import org.eclipse.imp.pdb.facts.IBool;
import org.eclipse.imp.pdb.facts.IConstructor;
import org.eclipse.imp.pdb.facts.IList;
import org.eclipse.imp.pdb.facts.IListWriter;
import org.eclipse.imp.pdb.facts.ISet;
import org.eclipse.imp.pdb.facts.ISourceLocation;
import org.eclipse.imp.pdb.facts.IString;
import org.eclipse.imp.pdb.facts.ITuple;
import org.eclipse.imp.pdb.facts.IValue;
import org.eclipse.imp.pdb.facts.IValueFactory;
import org.eclipse.imp.pdb.facts.IWriter;
import org.eclipse.imp.pdb.facts.exceptions.FactParseError;
import org.eclipse.imp.pdb.facts.exceptions.UnexpectedTypeException;
import org.eclipse.imp.pdb.facts.io.StandardTextReader;
import org.eclipse.imp.pdb.facts.type.DefaultTypeVisitor;
import org.eclipse.imp.pdb.facts.type.Type;
import org.eclipse.imp.pdb.facts.type.TypeFactory;
import org.eclipse.imp.pdb.facts.type.TypeStore;
import org.rascalmpl.interpreter.TypeReifier;
import org.rascalmpl.interpreter.utils.RuntimeExceptionFactory;
import org.rascalmpl.library.experiments.Compiler.RVM.Interpreter.RascalExecutionContext;
import org.rascalmpl.library.experiments.Compiler.RVM.Interpreter.Types;
import org.rascalmpl.unicode.UnicodeOutputStreamWriter;
import org.rascalmpl.uri.URIUtil;

public class IOCompiled extends IO {
  private static final TypeFactory types = TypeFactory.getInstance();
  private Types types2;
 
  private final IValueFactory values;
  private final StandardTextReader pdbReader;
  private int separator;    // The separator to be used between fields
  private boolean header;    // Does the file start with a line defining field names?

  private TypeReifier tr;
 
  public IOCompiled(IValueFactory values){
    super(values);
   
    this.values = values;
    this.types2 = new Types(values);
    this.tr = new TypeReifier(values);
    separator = ',';
    header = true;
    this.pdbReader = new StandardTextReader();
  }
 
  private void setOptions(IBool header, IString separator) {
    this.separator = separator == null ? ',' : separator.charAt(0);
    this.header = header == null ? true : header.getValue();
  }
 
  /*
   * Read a CSV file
   */
  public IValue readCSV(ISourceLocation loc, IBool header, IString separator, IString encoding, RascalExecutionContext rex){
    return read(null, loc, header, separator, encoding, rex);
  }
 
  public IValue readCSV(IValue result, ISourceLocation loc, IBool header, IString separator, IString encoding, RascalExecutionContext rex){
    return read(result, loc, header, separator, encoding, rex);
  }


  /*
   * Calculate the type of a CSV file, returned as the string
   */
  public IValue getCSVType(ISourceLocation loc, IBool header, IString separator, IString encoding, RascalExecutionContext rex){
    return computeType(loc, header, separator, encoding, rex);
  }
 
  //////
 
  private IValue read(IValue resultTypeConstructor, ISourceLocation loc, IBool header, IString separator, IString encoding, RascalExecutionContext rex) {
    setOptions(header, separator);
    Type resultType = types.valueType();
    TypeStore store = new TypeStore();
    if (resultTypeConstructor != null && resultTypeConstructor instanceof IConstructor) {
      resultType = tr.valueToType((IConstructor)resultTypeConstructor, store);
    }
    System.err.println("resultTypeConstructor = " + resultTypeConstructor);
    System.err.println("resultType = " + resultType);
    Type actualType = resultType;
    while (actualType.isAliased()) {
      actualType = actualType.getAliased();
    }
    Reader reader = null;
    try {
      reader = rex.getResolverRegistry().getCharacterReader(loc.getURI(), encoding.getValue());
      if (actualType.isTop()) {
        return readInferAndBuild(reader, store, rex);
      }
      else {
        return readAndBuild(reader, actualType, store, rex);
      }
    }
    catch (IOException e){
      throw RuntimeExceptionFactory.io(values.string(e.getMessage()), null, null);
    }
    finally {
      if (reader != null) {
        try {
          reader.close();
        } catch (IOException e){
          throw RuntimeExceptionFactory.io(values.string(e.getMessage()), null, null);
        }
      }
    }
  }


  private IValue computeType(ISourceLocation loc, IBool header, IString separator, IString encoding, RascalExecutionContext rex) {
    IValue csvResult = this.read(null, loc, header, separator, encoding, rex);
    return types2.typeToValue(csvResult.getType(), rex);
  }
 
  private String[] readFirstRecord(FieldReader reader) throws IOException {
    List<String> result = new ArrayList<String>();
    if (reader.hasRecord()) {
      while (reader.hasField()) {
        result.add(reader.getField());
      }
      return result.toArray(new String[0]);
    }
    return new String[0];
  }

  private void collectFields(FieldReader reader, final String[] currentRecord, RascalExecutionContext rex) throws IOException {
    int recordIndex = 0;
    while (reader.hasField()) {
      if (recordIndex < currentRecord.length) {
        currentRecord[recordIndex++] = reader.getField();
      }
      else {
        recordIndex++;  // count how many collumns to many we have
      }
    }
    if (recordIndex != currentRecord.length) {
      throw RuntimeExceptionFactory.illegalTypeArgument("Arities of actual type and requested type are different (expected: " + currentRecord.length + ", found: " + recordIndex + ")", null, null);
    }
  }

  private IValue readInferAndBuild(Reader stream, TypeStore store, RascalExecutionContext rex) throws IOException {
    FieldReader reader = new FieldReader(stream, separator);

    boolean first = header;
    final String[] currentRecord = readFirstRecord(reader);

    final String[] labels = new String[currentRecord.length];
    for (int l =0; l < labels.length; l++) {
      if (header) {
        labels[l] = normalizeLabel(currentRecord[l], l);
      }
      else {
        labels[l] = "field" + l;
      }
    }

    final Type[] expectedTypes = new Type[currentRecord.length];
    Arrays.fill(expectedTypes, types.valueType());
   
    final Type[] currentTypes = new Type[currentRecord.length];
    Arrays.fill(currentTypes, types.voidType());

    List<IValue[]> records = new LinkedList<>();

    do {
      if (first) {
        first = false;
        continue;
      }
      collectFields(reader, currentRecord, rex);

      IValue[] tuple = new IValue[currentRecord.length];
      parseRecordFields(currentRecord, expectedTypes, store, tuple, false, rex);
      records.add(tuple);

      // update inferred type
      for (int i = 0; i < currentTypes.length; i++){
        if (tuple[i] == null) continue;
        currentTypes[i] = currentTypes[i].lub(tuple[i].getType());
        if(currentTypes[i].isTop()) {
          currentTypes[i] = types.stringType();
        }
      }
    } while(reader.hasRecord());
   
    // done reading records, lets infer and build
    for (int i = 0; i < currentTypes.length; i++){
      if(currentTypes[i].isBottom()) {
        // in case of an completely empty row
        currentTypes[i] = types.stringType();
      }
    }
    Type tupleType = types.tupleType(currentTypes, labels);
    Type resultType = types.setType(tupleType);
    rex.getStdOut().println("readCSV inferred the relation type: " + resultType);
    rex.getStdOut().flush();
   
    IWriter result = values.setWriter();
    for (IValue[] rec : records) {
      result.insert(createTuple(tupleType, rec));
    }
    return result.done();
  }
  private IValue createTuple(Type tupleType, IValue[] rec) {
    // do some fixing of empties and value => string
    for (int i = 0 ; i < rec.length; i++) {
      if (rec[i] == null) {
        rec[i] = defaultValue(tupleType.getFieldType(i));
      }
      else if (tupleType.getFieldType(i).isString() && !rec[i].getType().isString()) {
        rec[i] = values.string(rec[i].toString());
      }
    }
    return values.tuple(tupleType, rec);
  }

  private IValue readAndBuild(Reader stream, Type actualType, TypeStore store, RascalExecutionContext rex) throws IOException {
    FieldReader reader = new FieldReader(stream, separator);
    IWriter result = actualType.isListRelation() ? values.listWriter() : values.setWriter();

    boolean first = header;
    Type tupleType = actualType.getElementType();
    Type[] expectedTypes = new Type[tupleType.getArity()];
    for (int i = 0; i < expectedTypes.length; i++) {
      expectedTypes[i] = tupleType.getFieldType(i);
    }

    final String[] currentRecord = new String[expectedTypes.length];
    final IValue[] tuple = new IValue[expectedTypes.length];
    while (reader.hasRecord()) {
      collectFields(reader, currentRecord, rex);
      if (first) {
        first = false;
        continue;
      }
      parseRecordFields(currentRecord, expectedTypes, store, tuple, true, rex);
      if (result instanceof IListWriter) {
        ((IListWriter)result).append(values.tuple(tuple));
      }
      else {
        result.insert(values.tuple(tuple));
      }
    }
    return result.done();
  }

  private void parseRecordFields(final String[] fields, final Type[] expectedTypes, TypeStore store, IValue[] result, boolean replaceEmpty, final RascalExecutionContext rex) throws IOException  {
    for (int i=0; i < fields.length; i++) {
      final String field = fields[i];
      final Type currentType = expectedTypes[i];

      if (field.isEmpty()) {
        if (replaceEmpty) {
          result[i] = defaultValue(currentType);
        }
        else {
          result[i] = null;
        }
        continue;
      }

      result[i] = currentType.accept(new DefaultTypeVisitor<IValue, RuntimeException>((IValue)null) {
        @Override
        public IValue visitString(Type type) throws RuntimeException {
          return values.string(field);
        }
        @Override
        public IValue visitInteger(Type type) throws RuntimeException {
          try {
            return values.integer(field);
          }
          catch (NumberFormatException nfe) {
            throw RuntimeExceptionFactory.illegalTypeArgument(currentType.toString(),null, null, "Invalid int \"" + field + "\" for requested field " + currentType);
          }
        }
        @Override
        public IValue visitReal(Type type) throws RuntimeException {
          try {
            return values.real(field);
          }
          catch (NumberFormatException nfe) {
            throw RuntimeExceptionFactory.illegalTypeArgument("Invalid real \"" + field + "\" for requested field " + currentType, null, null);
          }
        }

      });
     
      if (result[i] == null) {
        // use pdb reader to deserialize the other types
        StringReader in = new StringReader(field);
        try {
          result[i] = pdbReader.read(values, store, currentType, in);
          if (currentType.isTop() && result[i].getType().isString()) {
            result[i] = values.string(field);
          }
        }
        catch (UnexpectedTypeException ute) {
          throw RuntimeExceptionFactory.illegalTypeArgument("Invalid field \"" + field + "\" (" + ute.getExpected() + ") for requested field " + ute.getGiven(), null, null);
        }
        catch (FactParseError ex) {
          if (currentType.isTop()) {
            result[i] = values.string(field);
          }
          else {
            throw RuntimeExceptionFactory.illegalTypeArgument("Invalid field \"" + field + "\" is not a " + currentType, null, null);
          }
        }
        finally {
          in.close();
        }
      }
    }
  }

  private IValue defaultValue(Type targetType) {
    IValue result = targetType.accept(new DefaultTypeVisitor<IValue, RuntimeException>(null) {
      @Override
      public IValue visitBool(Type type) throws RuntimeException {
        return values.bool(false);
      }
      @Override
      public IValue visitDateTime(Type type) throws RuntimeException {
        return values.datetime(-1);
      }
      @Override
      public IValue visitInteger(Type type) throws RuntimeException {
        return values.integer(0);
      }
      @Override
      public IValue visitList(Type type) throws RuntimeException {
        return values.list(type.getElementType());
      }
      @Override
      public IValue visitMap(Type type) throws RuntimeException {
        return values.mapWriter().done();
      }
      @Override
      public IValue visitNumber(Type type) throws RuntimeException {
        return values.integer(0);
      }
      @Override
      public IValue visitRational(Type type) throws RuntimeException {
        return values.rational(0, 0);
      }
      @Override
      public IValue visitReal(Type type) throws RuntimeException {
        return values.real(0);
      }
      @Override
      public IValue visitSet(Type type) throws RuntimeException {
        return values.set(type.getElementType());
      }
      @Override
      public IValue visitSourceLocation(Type type) throws RuntimeException {
        return values.sourceLocation(URIUtil.invalidURI());
      }
      @Override
      public IValue visitString(Type type) throws RuntimeException {
        return values.string("");
      }
      @Override
      public IValue visitTuple(Type type) throws RuntimeException {
        IValue[] elems = new IValue[type.getArity()];
        for (int i =0; i < elems.length; i++) {
          elems[i] = type.getFieldType(i).accept(this);
          if (elems[i] == null)
            return null;
        }
        return values.tuple(elems);
      }
      @Override
      public IValue visitValue(Type type) throws RuntimeException {
        return values.string("");
      }
    });
    if (result != null)
      return result;
    throw RuntimeExceptionFactory.illegalTypeArgument("Cannot create a default value for an empty field of type " + targetType, null, null);
  }
 
 
  /*
   * Write a CSV file.
   */
 
  public void writeCSV(IValue rel, ISourceLocation loc, IBool header, IString separator, IString encoding, RascalExecutionContext rex){
    String sep = separator != null ? separator.getValue() : ",";
    Boolean head = header != null ? header.getValue() : true;
    Writer out = null;
   
    //Type paramType = ctx.getCurrentEnvt().getTypeBindings().get(types.parameterType("T"));
   
    Type paramType = rel.getType();
    if(!paramType.isRelation() && !paramType.isListRelation()){
      throw RuntimeExceptionFactory.illegalTypeArgument("A relation type is required instead of " + paramType, null, null);
    }
   
    try{
      boolean isListRel = rel instanceof IList;
      out = new UnicodeOutputStreamWriter(rex.getResolverRegistry().getOutputStream(loc.getURI(), false), encoding.getValue(), false);
      out = new BufferedWriter(out); // performance
      ISet irel = null;
      IList lrel = null;
      if (isListRel) {
        lrel = (IList)rel;
      }
      else {
        irel = (ISet) rel;
      }
     
      int nfields = isListRel ? lrel.asRelation().arity() : irel.asRelation().arity();
      if(head){
        for(int i = 0; i < nfields; i++){
          if(i > 0)
            out.write(sep);
          String label = paramType.getFieldName(i);
          if(label == null || label.isEmpty())
            label = "field" + i;
          out.write(label);
        }
        out.write('\n');
      }
     
      Pattern escapingNeeded = Pattern.compile("[\\n\\r\"\\x" + Integer.toHexString(separator.charAt(0)) + "]");
     
      for(IValue v : (isListRel ? lrel : irel)){
        ITuple tup = (ITuple) v;
        boolean firstTime = true;
        for(IValue w : tup){
          if(firstTime)
            firstTime = false;
          else
            out.write(sep);

          String s;
          if(w.getType().isString()){
            s = ((IString)w).getValue();
          }
          else {
            s = w.toString();
          }
          if(escapingNeeded.matcher(s).find()){
            s = s.replaceAll("\"", "\"\"");
            out.write('"');
            out.write(s);
            out.write('"');
          } else
            out.write(s);
        }
        out.write('\n');
      }
    }
    catch(IOException e){
      throw RuntimeExceptionFactory.io(values.string(e.getMessage()), null, null);
    }finally{
      if(out != null){
        try{
          out.flush();
          out.close();
        }catch(IOException ioex){
          throw RuntimeExceptionFactory.io(values.string(ioex.getMessage()), null, null);
        }
      }
    }
  }
 
  /**
   * Normalize a label in the header for use in the relation type.
   * The name is escaped to avoid conflicts with Rascal keywords.
   * @param label  The string found in the header
   * @param pos  Position in the header
   * @return    The label (with non-fieldname characters removed) or "field<pos>" when empty
   */
  private String normalizeLabel(String label, int pos){
    label = label.replaceAll("[^a-zA-Z0-9]+", "");
    if(label.isEmpty())
      return "field" + pos;
    else
      return "\\" + label;
  }
}
TOP

Related Classes of org.rascalmpl.library.lang.csv.IOCompiled

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.