Package edu.isi.karma.cleaning.Research

Source Code of edu.isi.karma.cleaning.Research.Test

package edu.isi.karma.cleaning.Research;

import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Vector;

import au.com.bytecode.opencsv.CSVReader;
import edu.isi.karma.cleaning.DataPreProcessor;
import edu.isi.karma.cleaning.ExampleCluster;
import edu.isi.karma.cleaning.ExampleCluster.method;
import edu.isi.karma.cleaning.ExampleSelection;
import edu.isi.karma.cleaning.GradientDecendOptimizer;
import edu.isi.karma.cleaning.InterpreterType;
import edu.isi.karma.cleaning.Messager;
import edu.isi.karma.cleaning.ProgSynthesis;
import edu.isi.karma.cleaning.ProgramRule;
import edu.isi.karma.cleaning.UtilTools;

public class Test {

  // check whether it longest or shortest
  public static boolean visible(HashMap<String, String[]> xHashMap, String Id) {
    String[] pair = xHashMap.get(Id);
    HashMap<String, String> tmp = new HashMap<String, String>();
    try {
      UtilTools.StringColorCode(pair[0], pair[1], tmp);
    } catch (Exception e) {
      tmp.put("Org", pair[0]);
      tmp.put("Tar", "ERROR");
      tmp.put("Orgdis", pair[0]);
      tmp.put("Tardis", "ERROR");
    }
    String tar = tmp.get("Tar");
    int length = tar.length();
    boolean sequalBefore = false;
    boolean lequalBefore = false;
    boolean shortest = true;
    boolean longest = true;
    for (String[] elem : xHashMap.values()) {
      HashMap<String, String> t = new HashMap<String, String>();
      try {
        UtilTools.StringColorCode(elem[0], elem[1], t);
      } catch (Exception ex) {
        tmp.put("Org", elem[0]);
        tmp.put("Tar", "ERROR");
        tmp.put("Orgdis", elem[0]);
        tmp.put("Tardis", "ERROR");
      }
      String tres = tmp.get("Tar");
      int newl = tres.length();
      if (newl >= length) {
        if (lequalBefore)
          longest = false;
        lequalBefore = true;
      }
      if (newl <= length) {
        if (sequalBefore)
          shortest = false;
        sequalBefore = true;
      }
    }
    return (shortest || longest);
  }

  public void parameterSelection(String dirpath) {
    ArrayList<String> selectParams = new ArrayList<String>();
    ArrayList<String> runningInfo = new ArrayList<String>();
    File nf = new File(dirpath);
    File[] allfiles = nf.listFiles();
    double[] r = { 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8 };
    // statistics
    // list all the csv file under the dir
    for (File f : allfiles) {
      if (f.getName().indexOf(".csv") != -1
          && f.getName().indexOf(".csv") == (f.getName().length() - 4)) {
        double minS = Double.MAX_VALUE;
        double bestRatio = 0.0;
        for (int i = 0; i < r.length; i++) {
          GradientDecendOptimizer.ratio = r[i];
          double s = transform(f);
          if (s <= minS) {
            minS = s;
            bestRatio = r[i];

          }
          String runinfo = String.format("%s: ratio: %f, value:%f\n",
              f.getName(), r[i], s);
          runningInfo.add(runinfo);
        }
        String parms = String.format("%s: ratio: %f, BestVal:%f",
            f.getName(), bestRatio, minS);
        selectParams.add(parms);
      }
    }
    System.out.println("" + runningInfo.toString());
    System.out.println(selectParams.toString());
  }

  public double transform(File f) {
    double measure = Double.MAX_VALUE;
    try {
      Vector<String[]> examples = new Vector<String[]>();
      Vector<String[]> addExamples = new Vector<String[]>();
      Vector<String[]> entries = new Vector<String[]>();
      if (f.getName().indexOf(".csv") != -1
          && f.getName().indexOf(".csv") == (f.getName().length() - 4)) {
        // MyLogger.logsth("========"+f.getName()+"============\n");
        HashMap<String, String[]> xHashMap = new HashMap<String, String[]>();
        @SuppressWarnings("resource")
        CSVReader cr = new CSVReader(new FileReader(f), ',', '"', '\0');
        String[] pair;
        int index = 0;
        Vector<String> vtmp = new Vector<String>();
        while ((pair = cr.readNext()) != null) {
          if (pair == null || pair.length <= 1)
            break;
          entries.add(pair);
          vtmp.add(pair[0]);
          String[] line = { pair[0], pair[1], "", "", "wrong" }; // org,
                                      // tar,
                                      // tarcode,
                                      // label
          xHashMap.put(index + "", line);
          index++;
        }
        DataPreProcessor dpp = new DataPreProcessor(vtmp);
        dpp.run();
        Messager msger = new Messager();
        Vector<Vector<String[]>> constraints = new Vector<Vector<String[]>>();
        if (entries.size() <= 1)
          return Double.MAX_VALUE;
        ExampleSelection expsel = new ExampleSelection();
        ExampleSelection.firsttime = true;
        expsel.inite(xHashMap, null);
        int target = Integer.parseInt(expsel.Choose());
        String[] mt = { "<_START>" + entries.get(target)[0] + "<_END>",
            entries.get(target)[1] };
        examples.add(mt);
        ExampleSelection.firsttime = false;
        // accuracy record code
        ArrayList<double[]> accArrayList = new ArrayList<double[]>();
        long stime = System.currentTimeMillis();
        boolean overtime = true;
        while (true) // repeat as no incorrect answer appears.
        {
          if (examples.size() == 4) {
            System.out.println("Hello World");
          }
          if (System.currentTimeMillis() - stime > 600000) {
            overtime = false;
            break;
          }
          long checknumber = 1;
          long iterAfterNoFatalError = 0;
          long isvisible = 0;
          HashMap<String, Vector<String[]>> expFeData = new HashMap<String, Vector<String[]>>();
          Vector<String> resultString = new Vector<String>();
          xHashMap = new HashMap<String, String[]>();
          ProgSynthesis psProgSynthesis = new ProgSynthesis();
          HashMap<String, String> unlabeledData = new HashMap<String, String>();
          for (int i = 0; i < vtmp.size(); i++) {
            unlabeledData.put("" + i, vtmp.get(i));
          }
          psProgSynthesis.inite(examples, dpp, msger);
          Vector<ProgramRule> pls = new Vector<ProgramRule>();
          Collection<ProgramRule> ps = psProgSynthesis.run_main();
          // collect history contraints
          msger.updateCM_Constr(psProgSynthesis.partiCluster
              .getConstraints());
          msger.updateWeights(psProgSynthesis.partiCluster.weights);
          // constraints.addAll();
          if (ps != null) {
            pls.addAll(ps);
          } else {
            System.out.println("Cannot find any rule");
          }
          String[] wexam = null;
          if (pls.size() == 0)
            break;
          long t1 = System.currentTimeMillis();
          int ErrorCnt = 0;
          int clf_acc_error_cnt = 0;
          int clf_acc_total_cnt = 0;
          HashMap<String, HashMap<String, String>> uData = new HashMap<String, HashMap<String, String>>();
          for (int i = 0; i < pls.size(); i++) {
            ProgramRule script = pls.get(i);
            // System.out.println(script);
            String res = "";
            for (int j = 0; j < entries.size(); j++) {
              InterpreterType worker = script
                  .getRuleForValue(entries.get(j)[0]);
              String classlabel = script.getClassForValue(entries
                  .get(j)[0]);
              String tmps = worker
                  .execute_debug(entries.get(j)[0]);
              HashMap<String, String> dict = new HashMap<String, String>();
              dict.put("class", classlabel);
              UtilTools.StringColorCode(entries.get(j)[0], tmps,
                  dict);
              String s = dict.get("Tar");
              if (Test.isExample(entries.get(j)[0], examples)) {
                s = entries.get(j)[1];
              }
              res += s + "\n";
              if (ConfigParameters.debug == 1) {
                String indicator = "wrong";
                if (s.compareTo(entries.get(j)[1]) == 0)
                  indicator = "correct";
                if (!uData.containsKey(j + ""))
                  uData.put(j + "", dict);
              }
              // classifier accuracy

              boolean existres = false;
              for (InterpreterType w : script.rules.values()) {
                String xtmp = w.execute(entries.get(j)[0]);
                if (xtmp != null
                    && xtmp.length() > 0
                    && xtmp.compareTo(entries.get(j)[1]) == 0) {
                  existres = true;
                  break;
                }
              }
              if (existres) {
                clf_acc_total_cnt++;
                if (s == null || s.length() == 0
                    || s.compareTo(entries.get(j)[1]) != 0) {
                  clf_acc_error_cnt++;
                }
              }
              //
              if (s == null || s.length() == 0) {
                String[] ts = {
                    "<_START>" + entries.get(j)[0]
                        + "<_END>", "", tmps,
                    classlabel, "wrong" };
                xHashMap.put(j + "", ts);
                wexam = ts;
                checknumber++;
              }
              boolean isfind = false;
              for (String[] exppair : examples) {
                if (exppair[0].compareTo("<_START>"
                    + dict.get("Org") + "<_END>") == 0) {
                  String[] exp = { dict.get("Org"), tmps };
                  if (!expFeData.containsKey(classlabel)) {
                    Vector<String[]> vstr = new Vector<String[]>();
                    vstr.add(exp);
                    expFeData.put(classlabel, vstr);
                  } else {
                    expFeData.get(classlabel).add(exp);
                  }
                  isfind = true;
                }
              }
              // update positive traing data with user
              // specification
              for (String[] tmpx : addExamples) {
                if (tmpx[0].compareTo(dict.get("Org")) == 0
                    && tmpx[1].compareTo(dict.get("Tar")) == 0) {
                  String[] exp = { dict.get("Org"), tmps };
                  if (!expFeData.containsKey(classlabel)) {
                    Vector<String[]> vstr = new Vector<String[]>();
                    vstr.add(exp);
                    expFeData.put(classlabel, vstr);
                  } else {
                    expFeData.get(classlabel).add(exp);
                  }
                  isfind = true;
                }
              }
              if (!isfind) {
                String[] ts = {
                    "<_START>" + entries.get(j)[0]
                        + "<_END>", s, tmps,
                    classlabel, "right" };
                if (s.compareTo(entries.get(j)[1]) != 0) {
                  ErrorCnt++;
                  wexam = ts;
                  ts[4] = "wrong";
                }
                xHashMap.put(j + "", ts);
              }
            }

            if (wexam == null
                || (entries.size() - ErrorCnt) * 1.0
                    / entries.size() == 1.0) {
              wexam = null;
              break;
            }
            resultString.add(res);
          }
          double resultacc = (entries.size() - ErrorCnt) * 1.0
              / entries.size();
          double clf_acc = 1 - clf_acc_error_cnt * 1.0
              / clf_acc_total_cnt;
          double[] accarray = { resultacc, clf_acc };
          // use uData to refiner the result
          System.out.println("" + psProgSynthesis.myprog.toString());
          System.out.println(pls.get(0).toString());
          // cRefiner.clusterUdata();
          long t2 = System.currentTimeMillis();

          if (wexam != null) {
            String[] wexp = new String[2];
            while (true) {
              expsel = new ExampleSelection();
              expsel.inite(xHashMap, expFeData);
              int e = Integer.parseInt(expsel.Choose());
              // /
              System.out.println("Recommand Example: "
                  + Arrays.toString(xHashMap.get("" + e)));
              // /
              if (xHashMap.get("" + e)[4].compareTo("right") != 0) {
                wexp[0] = "<_START>" + entries.get(e)[0]
                    + "<_END>";
                wexp[1] = entries.get(e)[1];
                if (expsel.isDetectingQuestionableRecord) {
                  iterAfterNoFatalError++;
                  // check whether this record is has the
                  // longest or shortest result
                  Boolean v = visible(xHashMap, "" + e);
                  if (v) {
                    isvisible += 1;
                  }
                }
                break;
              } else {
                // update positive training data
                addExamples.add(entries.get(e));
                // update the rest dataset
                xHashMap.remove("" + e);
              }
              checknumber++;
            }

            examples.add(wexp);

          } else {
            measure = examples.size();
            break;
          }
        }

      }
    } catch (Exception ex) {
      ex.printStackTrace();
    }
    return measure;
  }
  public static String test_seq(ArrayList<String[]> exps) {
    String timeres = "";
    HashMap<String, String[]> xHashMap = new HashMap<String, String[]>();
    int index = 0;
    Vector<String> vtmp = new Vector<String>();
    Vector<String[]> examples = new Vector<String[]>();
    for (String[] pair : exps) {
      if (pair == null || pair.length <= 1)
        break;
      String[] line = { pair[0], pair[1], "", "", "wrong" };
      vtmp.add(pair[0]);
      xHashMap.put(index + "", line);
      index++;
    }
    DataPreProcessor dpp = new DataPreProcessor(vtmp);
    dpp.run();
    Messager msger = new Messager();
    Vector<Vector<String[]>> constraints = new Vector<Vector<String[]>>();
    ArrayList<double[]> accArrayList = new ArrayList<double[]>();
    int i = 0;
    while (i<exps.size()) // repeat as no incorrect answer appears.
    {
      String[] tmt = { "<_START>" + exps.get(i)[0] + "<_END>", exps.get(i)[1] };
      examples.add(tmt);
      xHashMap = new HashMap<String, String[]>();
      ProgSynthesis psProgSynthesis = new ProgSynthesis();
      psProgSynthesis.inite(examples, dpp, msger);
      Vector<ProgramRule> pls = new Vector<ProgramRule>();
      long t1 = System.currentTimeMillis();
      Collection<ProgramRule> ps = psProgSynthesis.run_main();
      long span = System.currentTimeMillis()-t1;
      timeres += span+",";
      // collect history contraints
      msger.updateCM_Constr(psProgSynthesis.partiCluster.getConstraints());
      msger.updateWeights(psProgSynthesis.partiCluster.weights);
      i++;
      // constraints.addAll();
      if (ps != null) {
        pls.addAll(ps);
        System.out.println("program: "+ps);
      } else {
        System.out.println("Cannot find any rule");
      }
      if (pls.size() == 0)
        break;
    }
    return timeres;
  }
  public static String test_adaptive_seq(ArrayList<String[]> exps) {
    String timeres = "";
    HashMap<String, String[]> xHashMap = new HashMap<String, String[]>();
    int index = 0;
    Vector<String> vtmp = new Vector<String>();
    Vector<String[]> examples = new Vector<String[]>();
    for (String[] pair : exps) {
      if (pair == null || pair.length <= 1)
        break;
      String[] line = { pair[0], pair[1], "", "", "wrong" };
      vtmp.add(pair[0]);
      xHashMap.put(index + "", line);
      index++;
    }
    DataPreProcessor dpp = new DataPreProcessor(vtmp);
    dpp.run();
    Messager msger = new Messager();
    Vector<Vector<String[]>> constraints = new Vector<Vector<String[]>>();
    ArrayList<double[]> accArrayList = new ArrayList<double[]>();
    int i = 0;
    while (i<exps.size()) // repeat as no incorrect answer appears.
    {
      String[] tmt = { "<_START>" + exps.get(i)[0] + "<_END>", exps.get(i)[1] };
      examples.add(tmt);
      xHashMap = new HashMap<String, String[]>();
      ProgSynthesis psProgSynthesis = new ProgSynthesis();
      psProgSynthesis.inite(examples, dpp, msger);
      Vector<ProgramRule> pls = new Vector<ProgramRule>();
      long t1 = System.currentTimeMillis();
      Collection<ProgramRule> ps = psProgSynthesis.adaptive_main();
      long span = System.currentTimeMillis()-t1;
      timeres += span+",";
      // collect history contraints
      msger.updateCM_Constr(psProgSynthesis.partiCluster.getConstraints());
      msger.updateWeights(psProgSynthesis.partiCluster.weights);
      i++;
      // constraints.addAll();
      if (ps != null) {
        pls.addAll(ps);
        System.out.println("program: "+ps);
      } else {
        System.out.println("Cannot find any rule");
      }
      if (pls.size() == 0)
        break;
    }
    return timeres;
  }

  public static void test3(String dirpath) {
    HashMap<String, Vector<String>> records = new HashMap<String, Vector<String>>();
    File nf = new File(dirpath);
    File[] allfiles = nf.listFiles();
    // statistics
    DataCollection dCollection = new DataCollection();
    // list all the csv file under the dir
    for (File f : allfiles) {
      Vector<String[]> examples = new Vector<String[]>();
      Vector<String[]> addExamples = new Vector<String[]>();
      Vector<String[]> entries = new Vector<String[]>();
      try {
        if (f.getName().indexOf(".csv") != -1
            && f.getName().indexOf(".csv") == (f.getName().length() - 4)) {
          // MyLogger.logsth("========"+f.getName()+"============\n");
          HashMap<String, String[]> xHashMap = new HashMap<String, String[]>();
          @SuppressWarnings("resource")
          CSVReader cr = new CSVReader(new FileReader(f), ',', '"',
              '\0');
          String[] pair;
          int index = 0;
          Vector<String> vtmp = new Vector<String>();
          while ((pair = cr.readNext()) != null) {
            if (pair == null || pair.length <= 1)
              break;
            entries.add(pair);
            vtmp.add(pair[0]);
            String[] line = { pair[0], pair[1], "", "", "wrong" }; // org,
                                        // tar,
                                        // tarcode,
                                        // label
            xHashMap.put(index + "", line);
            index++;
          }
          DataPreProcessor dpp = new DataPreProcessor(vtmp);
          dpp.run();
          Messager msger = new Messager();
          Vector<Vector<String[]>> constraints = new Vector<Vector<String[]>>();
          if (entries.size() <= 1)
            continue;
          ExampleSelection expsel = new ExampleSelection();
          ExampleSelection.firsttime = true;
          expsel.inite(xHashMap, null);
          int target = Integer.parseInt(expsel.Choose());
          String[] mt = {
              "<_START>" + entries.get(target)[0] + "<_END>",
              entries.get(target)[1] };
          examples.add(mt);
          ExampleSelection.firsttime = false;
          // accuracy record code
          ArrayList<double[]> accArrayList = new ArrayList<double[]>();

          while (true) // repeat as no incorrect answer appears.
          {
            long checknumber = 1;
            long iterAfterNoFatalError = 0;
            HashMap<String, Vector<String[]>> expFeData = new HashMap<String, Vector<String[]>>();
            Vector<String> resultString = new Vector<String>();
            xHashMap = new HashMap<String, String[]>();
            ProgSynthesis psProgSynthesis = new ProgSynthesis();
            HashMap<String, String> unlabeledData = new HashMap<String, String>();
            for (int i = 0; i < vtmp.size(); i++) {
              unlabeledData.put("" + i, vtmp.get(i));
            }
            psProgSynthesis.inite(examples, dpp, msger);
            Vector<ProgramRule> pls = new Vector<ProgramRule>();
            Collection<ProgramRule> ps = psProgSynthesis
                .adaptive_main();
            // collect history contraints
            msger.updateCM_Constr(psProgSynthesis.partiCluster
                .getConstraints());
            msger.updateWeights(psProgSynthesis.partiCluster.weights);
            // constraints.addAll();
            if (ps != null) {
              pls.addAll(ps);
            } else {
              System.out.println("Cannot find any rule");
            }
            String[] wexam = null;
            if (pls.size() == 0)
              break;
            long t1 = System.currentTimeMillis();
            int ErrorCnt = 0;
            int clf_acc_error_cnt = 0;
            int clf_acc_total_cnt = 0;
            HashMap<String, HashMap<String, String>> uData = new HashMap<String, HashMap<String, String>>();
            for (int i = 0; i < pls.size(); i++) {
              ProgramRule script = pls.get(i);
              // System.out.println(script);
              String res = "";
              for (int j = 0; j < entries.size(); j++) {
                InterpreterType worker = script
                    .getRuleForValue(entries.get(j)[0]);
                String classlabel = script
                    .getClassForValue(entries.get(j)[0]);
                String tmps = worker.execute_debug(entries
                    .get(j)[0]);
                HashMap<String, String> dict = new HashMap<String, String>();
                dict.put("class", classlabel);
                UtilTools.StringColorCode(entries.get(j)[0],
                    tmps, dict);
                String s = dict.get("Tar");
                if (Test.isExample(entries.get(j)[0], examples)) {
                  s = entries.get(j)[1];
                }
                res += s + "\n";
                if (ConfigParameters.debug == 1) {
                  String indicator = "wrong";
                  if (s.compareTo(entries.get(j)[1]) == 0)
                    indicator = "correct";
                  if (!uData.containsKey(j + ""))
                    uData.put(j + "", dict);
                }
                // classifier accuracy

                boolean existres = false;
                for (InterpreterType w : script.rules.values()) {
                  String xtmp = w.execute(entries.get(j)[0]);
                  if (xtmp != null
                      && xtmp.length() > 0
                      && xtmp.compareTo(entries.get(j)[1]) == 0) {
                    existres = true;
                    break;
                  }
                }
                if (existres) {
                  clf_acc_total_cnt++;
                  if (s == null
                      || s.length() == 0
                      || s.compareTo(entries.get(j)[1]) != 0) {
                    clf_acc_error_cnt++;
                  }
                }
                //
                if (s == null || s.length() == 0) {
                  String[] ts = {
                      "<_START>" + entries.get(j)[0]
                          + "<_END>", "", tmps,
                      classlabel, "wrong" };
                  xHashMap.put(j + "", ts);
                  wexam = ts;
                  checknumber++;
                }
                boolean isfind = false;
                for (String[] exppair : examples) {
                  if (exppair[0].compareTo("<_START>"
                      + dict.get("Org") + "<_END>") == 0) {
                    String[] exp = { dict.get("Org"), tmps };
                    if (!expFeData.containsKey(classlabel)) {
                      Vector<String[]> vstr = new Vector<String[]>();
                      vstr.add(exp);
                      expFeData.put(classlabel, vstr);
                    } else {
                      expFeData.get(classlabel).add(exp);
                    }
                    isfind = true;
                  }
                }
                // update positive traing data with user
                // specification
                for (String[] tmpx : addExamples) {
                  if (tmpx[0].compareTo(dict.get("Org")) == 0
                      && tmpx[1].compareTo(dict
                          .get("Tar")) == 0) {
                    String[] exp = { dict.get("Org"), tmps };
                    if (!expFeData.containsKey(classlabel)) {
                      Vector<String[]> vstr = new Vector<String[]>();
                      vstr.add(exp);
                      expFeData.put(classlabel, vstr);
                    } else {
                      expFeData.get(classlabel).add(exp);
                    }
                    isfind = true;
                  }
                }
                if (!isfind) {
                  String[] ts = {
                      "<_START>" + entries.get(j)[0]
                          + "<_END>", s, tmps,
                      classlabel, "right" };
                  if (s.compareTo(entries.get(j)[1]) != 0) {
                    ErrorCnt++;
                    wexam = ts;
                    ts[4] = "wrong";
                  }
                  xHashMap.put(j + "", ts);
                }
              }

              if (wexam == null
                  || (entries.size() - ErrorCnt) * 1.0
                      / entries.size() == 1.0) {
                wexam = null;
                break;
              }
              resultString.add(res);
            }
            double resultacc = (entries.size() - ErrorCnt) * 1.0
                / entries.size();
            double clf_acc = 1 - clf_acc_error_cnt * 1.0
                / clf_acc_total_cnt;
            double[] accarray = { resultacc, clf_acc };
            // use uData to refiner the result
            System.out.println(""
                + psProgSynthesis.myprog.toString());
            System.out.println(pls.get(0).toString());
            // cRefiner.clusterUdata();
            records.put(f.getName() + examples.size(), resultString);
            long t2 = System.currentTimeMillis();

            if (wexam != null) {
              String[] wexp = new String[2];
              while (true) {
                expsel = new ExampleSelection();
                expsel.inite(xHashMap, expFeData);
                int e = Integer.parseInt(expsel.Choose());
                // /
                System.out
                    .println("Recommand Example: "
                        + Arrays.toString(xHashMap
                            .get("" + e)));
                // /
                if (xHashMap.get("" + e)[4].compareTo("right") != 0) {
                  wexp[0] = "<_START>" + entries.get(e)[0]
                      + "<_END>";
                  wexp[1] = entries.get(e)[1];
                  if (expsel.isDetectingQuestionableRecord) {
                    iterAfterNoFatalError++;
                    // check whether this record is has the
                    // longest or shortest result
                    Boolean v = visible(xHashMap, "" + e);
                  }
                  break;
                } else {
                  // update positive training data
                  addExamples.add(entries.get(e));
                  // update the rest dataset
                  xHashMap.remove("" + e);
                }
                checknumber++;
              }

              examples.add(wexp);
              FileStat fileStat = new FileStat(f.getName() + "",
                  psProgSynthesis.learnspan,
                  psProgSynthesis.genspan, (t2 - t1),
                  examples.size(), examples,
                  psProgSynthesis.partiCluster.failedCnt,
                  checknumber, iterAfterNoFatalError,
                  psProgSynthesis.myprog.partitions.size(),
                  pls.get(0).toString(), resultacc, clf_acc);
              dCollection.addEntry(fileStat);
            } else {
              FileStat fileStat = new FileStat(f.getName() + "",
                  psProgSynthesis.learnspan,
                  psProgSynthesis.genspan, (t2 - t1),
                  examples.size(), examples,
                  psProgSynthesis.partiCluster.failedCnt,
                  checknumber, iterAfterNoFatalError,
                  psProgSynthesis.myprog.partitions.size(),
                  pls.get(0).toString(), resultacc, clf_acc);
              dCollection.addEntry(fileStat);
              dCollection.addSucceededFile(f.getName());
              break;
            }
          }
        }
      } catch (Exception ex) {
        ex.printStackTrace();
      }
    }
    dCollection.print();
    dCollection.print1();
    Prober.displayProgram();
    //
  }

  public static void test4(String dirpath) {
    HashMap<String, Vector<String>> records = new HashMap<String, Vector<String>>();
    File nf = new File(dirpath);
    File[] allfiles = nf.listFiles();
    // statistics
    DataCollection dCollection = new DataCollection();
    // list all the csv file under the dir
    for (File f : allfiles) {
      Vector<String[]> examples = new Vector<String[]>();
      Vector<String[]> addExamples = new Vector<String[]>();
      Vector<String[]> entries = new Vector<String[]>();
      try {
        if (f.getName().indexOf(".csv") != -1
            && f.getName().indexOf(".csv") == (f.getName().length() - 4)) {
          // MyLogger.logsth("========"+f.getName()+"============\n");
          HashMap<String, String[]> xHashMap = new HashMap<String, String[]>();
          @SuppressWarnings("resource")
          CSVReader cr = new CSVReader(new FileReader(f), ',', '"',
              '\0');
          String[] pair;
          int index = 0;
          Vector<String> vtmp = new Vector<String>();
          while ((pair = cr.readNext()) != null) {
            if (pair == null || pair.length <= 1)
              break;
            entries.add(pair);
            vtmp.add(pair[0]);
            String[] line = { pair[0], pair[1], "", "", "wrong" }; // org,
                                        // tar,
                                        // tarcode,
                                        // label
            xHashMap.put(index + "", line);
            index++;
          }
          DataPreProcessor dpp = new DataPreProcessor(vtmp);
          dpp.run();
          Messager msger = new Messager();
          Vector<Vector<String[]>> constraints = new Vector<Vector<String[]>>();
          if (entries.size() <= 1)
            continue;
          ExampleSelection expsel = new ExampleSelection();
          ExampleSelection.firsttime = true;
          expsel.inite(xHashMap, null);
          int target = Integer.parseInt(expsel.Choose());
          String[] mt = {
              "<_START>" + entries.get(target)[0] + "<_END>",
              entries.get(target)[1] };
          examples.add(mt);
          ExampleSelection.firsttime = false;
          // accuracy record code
          ArrayList<double[]> accArrayList = new ArrayList<double[]>();
          long stime = System.currentTimeMillis();
          boolean overtime = true;
          while (true) // repeat as no incorrect answer appears.
          {
            if (examples.size() == 8) {
              System.out.println("Hello World");
            }
            if (System.currentTimeMillis() - stime > 600000) {
              overtime = false;
              break;
            }
            long checknumber = 1;
            long iterAfterNoFatalError = 0;
            long isvisible = 0;
            HashMap<String, Vector<String[]>> expFeData = new HashMap<String, Vector<String[]>>();
            Vector<String> resultString = new Vector<String>();
            xHashMap = new HashMap<String, String[]>();
            ProgSynthesis psProgSynthesis = new ProgSynthesis();
            HashMap<String, String> unlabeledData = new HashMap<String, String>();
            for (int i = 0; i < vtmp.size(); i++) {
              unlabeledData.put("" + i, vtmp.get(i));
            }
            psProgSynthesis.inite(examples, dpp, msger);
            Vector<ProgramRule> pls = new Vector<ProgramRule>();
            Collection<ProgramRule> ps = psProgSynthesis.run_main();
            // collect history contraints
            msger.updateCM_Constr(psProgSynthesis.partiCluster
                .getConstraints());
            msger.updateWeights(psProgSynthesis.partiCluster.weights);
            // constraints.addAll();
            if (ps != null) {
              pls.addAll(ps);
            } else {
              System.out.println("Cannot find any rule");
            }
            String[] wexam = null;
            if (pls.size() == 0)
              break;
            long t1 = System.currentTimeMillis();
            int ErrorCnt = 0;
            int clf_acc_error_cnt = 0;
            int clf_acc_total_cnt = 0;
            HashMap<String, HashMap<String, String>> uData = new HashMap<String, HashMap<String, String>>();
            for (int i = 0; i < pls.size(); i++) {
              ProgramRule script = pls.get(i);
              // System.out.println(script);
              String res = "";
              for (int j = 0; j < entries.size(); j++) {
                InterpreterType worker = script
                    .getRuleForValue(entries.get(j)[0]);
                String classlabel = script
                    .getClassForValue(entries.get(j)[0]);
                String tmps = worker.execute_debug(entries
                    .get(j)[0]);
                HashMap<String, String> dict = new HashMap<String, String>();
                dict.put("class", classlabel);
                UtilTools.StringColorCode(entries.get(j)[0],
                    tmps, dict);
                String s = dict.get("Tar");
                if (Test.isExample(entries.get(j)[0], examples)) {
                  s = entries.get(j)[1];
                }
                res += s + "\n";
                if (ConfigParameters.debug == 1) {
                  String indicator = "wrong";
                  if (s.compareTo(entries.get(j)[1]) == 0)
                    indicator = "correct";
                  if (!uData.containsKey(j + ""))
                    uData.put(j + "", dict);
                }
                // classifier accuracy

                boolean existres = false;
                for (InterpreterType w : script.rules.values()) {
                  String xtmp = w.execute(entries.get(j)[0]);
                  if (xtmp != null
                      && xtmp.length() > 0
                      && xtmp.compareTo(entries.get(j)[1]) == 0) {
                    existres = true;
                    break;
                  }
                }
                if (existres) {
                  clf_acc_total_cnt++;
                  if (s == null
                      || s.length() == 0
                      || s.compareTo(entries.get(j)[1]) != 0) {
                    clf_acc_error_cnt++;
                  }
                }
                //
                if (s == null || s.length() == 0) {
                  String[] ts = {
                      "<_START>" + entries.get(j)[0]
                          + "<_END>", "", tmps,
                      classlabel, "wrong" };
                  xHashMap.put(j + "", ts);
                  wexam = ts;
                  checknumber++;
                }
                boolean isfind = false;
                for (String[] exppair : examples) {
                  if (exppair[0].compareTo("<_START>"
                      + dict.get("Org") + "<_END>") == 0) {
                    String[] exp = { dict.get("Org"), tmps };
                    if (!expFeData.containsKey(classlabel)) {
                      Vector<String[]> vstr = new Vector<String[]>();
                      vstr.add(exp);
                      expFeData.put(classlabel, vstr);
                    } else {
                      expFeData.get(classlabel).add(exp);
                    }
                    isfind = true;
                  }
                }
                // update positive traing data with user
                // specification
                for (String[] tmpx : addExamples) {
                  if (tmpx[0].compareTo(dict.get("Org")) == 0
                      && tmpx[1].compareTo(dict
                          .get("Tar")) == 0) {
                    String[] exp = { dict.get("Org"), tmps };
                    if (!expFeData.containsKey(classlabel)) {
                      Vector<String[]> vstr = new Vector<String[]>();
                      vstr.add(exp);
                      expFeData.put(classlabel, vstr);
                    } else {
                      expFeData.get(classlabel).add(exp);
                    }
                    isfind = true;
                  }
                }
                if (!isfind) {
                  String[] ts = {
                      "<_START>" + entries.get(j)[0]
                          + "<_END>", s, tmps,
                      classlabel, "right" };
                  if (s.compareTo(entries.get(j)[1]) != 0) {
                    ErrorCnt++;
                    wexam = ts;
                    ts[4] = "wrong";
                  }
                  xHashMap.put(j + "", ts);
                }
              }

              if (wexam == null
                  || (entries.size() - ErrorCnt) * 1.0
                      / entries.size() == 1.0) {
                wexam = null;
                break;
              }
              resultString.add(res);
            }
            double resultacc = (entries.size() - ErrorCnt) * 1.0
                / entries.size();
            double clf_acc = 1 - clf_acc_error_cnt * 1.0
                / clf_acc_total_cnt;
            double[] accarray = { resultacc, clf_acc };
            // use uData to refiner the result
            System.out.println(""
                + psProgSynthesis.myprog.toString());
            System.out.println(pls.get(0).toString());
            // cRefiner.clusterUdata();
            records.put(f.getName() + examples.size(), resultString);
            long t2 = System.currentTimeMillis();

            if (wexam != null) {
              String[] wexp = new String[2];
              while (true) {
                expsel = new ExampleSelection();
                expsel.inite(xHashMap, expFeData);
                int e = Integer.parseInt(expsel.Choose());
                // /
                System.out
                    .println("Recommand Example: "
                        + Arrays.toString(xHashMap
                            .get("" + e)));
                // /
                if (xHashMap.get("" + e)[4].compareTo("right") != 0) {
                  wexp[0] = "<_START>" + entries.get(e)[0]
                      + "<_END>";
                  wexp[1] = entries.get(e)[1];
                  if (expsel.isDetectingQuestionableRecord) {
                    iterAfterNoFatalError++;
                    // check whether this record is has the
                    // longest or shortest result
                    Boolean v = visible(xHashMap, "" + e);
                    if (v) {
                      isvisible += 1;
                    }
                  }
                  break;
                } else {
                  // update positive training data
                  addExamples.add(entries.get(e));
                  // update the rest dataset
                  xHashMap.remove("" + e);
                }
                checknumber++;
              }

              examples.add(wexp);
              FileStat fileStat = new FileStat(f.getName() + "",
                  psProgSynthesis.learnspan,
                  psProgSynthesis.genspan, (t2 - t1),
                  examples.size(), examples,
                  psProgSynthesis.partiCluster.failedCnt,
                  checknumber, iterAfterNoFatalError,
                  psProgSynthesis.myprog.partitions.size(),
                  pls.get(0).toString(), resultacc, clf_acc);
              dCollection.addEntry(fileStat);
            } else {
              FileStat fileStat = new FileStat(f.getName() + "",
                  psProgSynthesis.learnspan,
                  psProgSynthesis.genspan, (t2 - t1),
                  examples.size(), examples,
                  psProgSynthesis.partiCluster.failedCnt,
                  checknumber, iterAfterNoFatalError,
                  psProgSynthesis.myprog.partitions.size(),
                  pls.get(0).toString(), resultacc, clf_acc);
              dCollection.addEntry(fileStat);
              dCollection.addSucceededFile(f.getName());
              break;
            }
          }
        }
      } catch (Exception ex) {
        ex.printStackTrace();
      }
    }
    dCollection.print();
    dCollection.print1();
    Prober.displayProgram();
    //
  }

  public static boolean isExample(String var, Vector<String[]> examples) {
    boolean is = false;
    String s = "<_START>" + var + "<_END>";
    for (String[] x : examples) {
      if (s.compareTo(x[0]) == 0) {
        is = true;
        break;
      }
    }
    return is;
  }

  public static void hashResultPrint(HashMap<String, Vector<String>> res) {
    String s = "";
    for (String key : res.keySet()) {
      s += "==============" + key + "=============\n";
      for (String value : res.get(key)) {
        s += value + "\n";
      }
    }
    System.out.println("" + s);
  }

  public static void runSeriseExper() {
    ExampleCluster.option = method.DPIC;
    Test.test4("/Users/bowu/Research/testdata/TestSingleFile");
    ExampleCluster.option = method.DPIC;
    Test.test4("/Users/bowu/Research/testdata/TestSingleFile");
    ExampleCluster.option = method.DP;
    Test.test4("/Users/bowu/Research/testdata/TestSingleFile");
    ExampleCluster.option = method.SPIC;
    Test.test4("/Users/bowu/Research/testdata/TestSingleFile");
    ExampleCluster.option = method.SP;
    Test.test4("/Users/bowu/Research/testdata/TestSingleFile");
    ExampleCluster.option = method.CPIC;
    Test.test4("/Users/bowu/Research/testdata/TestSingleFile");
    ExampleCluster.option = method.CP;
    Test.test4("/Users/bowu/Research/testdata/TestSingleFile");
  }

  public static int MaximalNumber = -1;
  public static int MinimalNumber = 100;
  public static Vector<String[]> larexamples = new Vector<String[]>();
  public static Vector<String[]> smalexamples = new Vector<String[]>();

  public static void main(String[] args) {
    // load parameters
    ConfigParameters cfg = new ConfigParameters();
    cfg.initeParameters();
    DataCollection.config = cfg.getString();
    Test.test4("/Users/bowu/Research/testdata/TestSingleFile");
    //Test.test3("/Users/bowu/Research/testdata/TestSingleFile");
    // Test.runSeriseExper();
    // Test test = new Test();
    // test.parameterSelection("/Users/bowu/Research/testdata/TestSingleFile");
  }
}
TOP

Related Classes of edu.isi.karma.cleaning.Research.Test

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.