Package edu.isi.karma.cleaning.Research

Source Code of edu.isi.karma.cleaning.Research.TimerRunner

package edu.isi.karma.cleaning.Research;

import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.TimerTask;
import java.util.Vector;
import java.util.concurrent.Callable;

import au.com.bytecode.opencsv.CSVReader;
import edu.isi.karma.cleaning.DataPreProcessor;
import edu.isi.karma.cleaning.ExampleSelection;
import edu.isi.karma.cleaning.InterpreterType;
import edu.isi.karma.cleaning.Messager;
import edu.isi.karma.cleaning.ProgSynthesis;
import edu.isi.karma.cleaning.ProgramRule;
import edu.isi.karma.cleaning.UtilTools;

public class TimerRunner implements Runnable {
  File f;
  DataCollection dCollection;

  public TimerRunner(File f, DataCollection dCollection) {
    this.f = f;
    this.dCollection = dCollection;
  }

  public void run() {
    try {
      HashMap<String, Vector<String>> records = new HashMap<String, Vector<String>>();
      Vector<String[]> examples = new Vector<String[]>();
      Vector<String[]> addExamples = new Vector<String[]>();
      Vector<String[]> entries = new Vector<String[]>();
      // TODO Auto-generated method stub
      if (f.getName().indexOf(".csv") != -1
          && f.getName().indexOf(".csv") == (f.getName().length() - 4)) {
        // MyLogger.logsth("========"+f.getName()+"============\n");
        HashMap<String, String[]> xHashMap = new HashMap<String, String[]>();
        @SuppressWarnings("resource")
        CSVReader cr = new CSVReader(new FileReader(f), ',', '"', '\0');
        String[] pair;
        int index = 0;
        Vector<String> vtmp = new Vector<String>();
        while ((pair = cr.readNext()) != null) {
          if (pair == null || pair.length <= 1)
            break;
          entries.add(pair);
          vtmp.add(pair[0]);
          String[] line = { pair[0], pair[1], "", "", "wrong" }; // org,
                                      // tar,
                                      // tarcode,
                                      // label
          xHashMap.put(index + "", line);
          index++;
        }
        DataPreProcessor dpp = new DataPreProcessor(vtmp);
        dpp.run();
        Messager msger = new Messager();
        Vector<Vector<String[]>> constraints = new Vector<Vector<String[]>>();
        if (entries.size() <= 1)
          return;
        ExampleSelection expsel = new ExampleSelection();
        ExampleSelection.firsttime = true;
        expsel.inite(xHashMap, null);
        int target = Integer.parseInt(expsel.Choose());
        String[] mt = { "<_START>" + entries.get(target)[0] + "<_END>",
            entries.get(target)[1] };
        examples.add(mt);
        ExampleSelection.firsttime = false;
        // accuracy record code
        ArrayList<double[]> accArrayList = new ArrayList<double[]>();
        while (true) // repeat as no incorrect answer appears.
        {
          if (examples.size() == 8) {
            System.out.println("Hello World");
          }
          long checknumber = 1;
          long iterAfterNoFatalError = 0;
          long isvisible = 0;
          HashMap<String, Vector<String[]>> expFeData = new HashMap<String, Vector<String[]>>();
          Vector<String> resultString = new Vector<String>();
          xHashMap = new HashMap<String, String[]>();
          ProgSynthesis psProgSynthesis = new ProgSynthesis();
          HashMap<String, String> unlabeledData = new HashMap<String, String>();
          for (int i = 0; i < vtmp.size(); i++) {
            unlabeledData.put("" + i, vtmp.get(i));
          }
          psProgSynthesis.inite(examples, dpp, msger);
          Vector<ProgramRule> pls = new Vector<ProgramRule>();
          Collection<ProgramRule> ps = psProgSynthesis.run_main();
          // collect history contraints
          msger.updateCM_Constr(psProgSynthesis.partiCluster
              .getConstraints());
          msger.updateWeights(psProgSynthesis.partiCluster.weights);
          // constraints.addAll();
          if (ps != null) {
            pls.addAll(ps);
          } else {
            System.out.println("Cannot find any rule");
          }
          String[] wexam = null;
          if (pls.size() == 0)
            break;
          long t1 = System.currentTimeMillis();
          int ErrorCnt = 0;
          int clf_acc_error_cnt = 0;
          int clf_acc_total_cnt = 0;
          HashMap<String, HashMap<String, String>> uData = new HashMap<String, HashMap<String, String>>();
          for (int i = 0; i < pls.size(); i++) {
            ProgramRule script = pls.get(i);
            // System.out.println(script);
            String res = "";
            for (int j = 0; j < entries.size(); j++) {
              InterpreterType worker = script
                  .getRuleForValue(entries.get(j)[0]);
              String classlabel = script.getClassForValue(entries
                  .get(j)[0]);
              String tmps = worker
                  .execute_debug(entries.get(j)[0]);
              HashMap<String, String> dict = new HashMap<String, String>();
              dict.put("class", classlabel);
              UtilTools.StringColorCode(entries.get(j)[0], tmps,
                  dict);
              String s = dict.get("Tar");
              res += s + "\n";
              if (ConfigParameters.debug == 1) {
                String indicator = "wrong";
                if (s.compareTo(entries.get(j)[1]) == 0)
                  indicator = "correct";
                if (!uData.containsKey(j + ""))
                  uData.put(j + "", dict);
              }
              // classifier accuracy

              boolean existres = false;
              for (InterpreterType w : script.rules.values()) {
                String xtmp = w.execute(entries.get(j)[0]);
                if (xtmp != null
                    && xtmp.length() > 0
                    && xtmp.compareTo(entries.get(j)[1]) == 0) {
                  existres = true;
                  break;
                }
              }
              if (existres) {
                clf_acc_total_cnt++;
                if (s == null || s.length() == 0
                    || s.compareTo(entries.get(j)[1]) != 0) {
                  clf_acc_error_cnt++;
                }
              }
              //
              if (s == null || s.length() == 0) {
                String[] ts = {
                    "<_START>" + entries.get(j)[0]
                        + "<_END>", "", tmps,
                    classlabel, "wrong" };
                xHashMap.put(j + "", ts);
                wexam = ts;
                checknumber++;
              }
              boolean isfind = false;
              for (String[] exppair : examples) {
                if (exppair[0].compareTo("<_START>"
                    + dict.get("Org") + "<_END>") == 0) {
                  String[] exp = { dict.get("Org"), tmps };
                  if (!expFeData.containsKey(classlabel)) {
                    Vector<String[]> vstr = new Vector<String[]>();
                    vstr.add(exp);
                    expFeData.put(classlabel, vstr);
                  } else {
                    expFeData.get(classlabel).add(exp);
                  }
                  isfind = true;
                }
              }
              // update positive traing data with user
              // specification
              for (String[] tmpx : addExamples) {
                if (tmpx[0].compareTo(dict.get("Org")) == 0
                    && tmpx[1].compareTo(dict.get("Tar")) == 0) {
                  String[] exp = { dict.get("Org"), tmps };
                  if (!expFeData.containsKey(classlabel)) {
                    Vector<String[]> vstr = new Vector<String[]>();
                    vstr.add(exp);
                    expFeData.put(classlabel, vstr);
                  } else {
                    expFeData.get(classlabel).add(exp);
                  }
                  isfind = true;
                }
              }
              if (!isfind) {
                String[] ts = {
                    "<_START>" + entries.get(j)[0]
                        + "<_END>", s, tmps,
                    classlabel, "right" };
                if (s.compareTo(entries.get(j)[1]) != 0) {
                  ErrorCnt++;
                  wexam = ts;
                  ts[4] = "wrong";
                }
                xHashMap.put(j + "", ts);
              }
            }

            if (wexam == null
                || (entries.size() - ErrorCnt) * 1.0
                    / entries.size() == 1.0) {
              wexam = null;
              break;
            }
            resultString.add(res);
          }
          double resultacc = (entries.size() - ErrorCnt) * 1.0
              / entries.size();
          double clf_acc = 1 - clf_acc_error_cnt * 1.0
              / clf_acc_total_cnt;
          double[] accarray = { resultacc, clf_acc };
          // use uData to refiner the result
          System.out.println("" + psProgSynthesis.myprog.toString());
          System.out.println(pls.get(0).toString());
          // cRefiner.clusterUdata();
          records.put(f.getName() + examples.size(), resultString);
          long t2 = System.currentTimeMillis();

          if (wexam != null) {
            String[] wexp = new String[2];
            while (true) {
              expsel = new ExampleSelection();
              expsel.inite(xHashMap, expFeData);
              int e = Integer.parseInt(expsel.Choose());
              // /
              System.out.println("Recommand Example: "
                  + Arrays.toString(xHashMap.get("" + e)));
              // /
              if (xHashMap.get("" + e)[4].compareTo("right") != 0) {
                wexp[0] = "<_START>" + entries.get(e)[0]
                    + "<_END>";
                wexp[1] = entries.get(e)[1];
                break;
              } else {
                // update positive training data
                addExamples.add(entries.get(e));
                // update the rest dataset
                xHashMap.remove("" + e);
              }
              checknumber++;
            }

            examples.add(wexp);
            FileStat fileStat = new FileStat(f.getName() + "",
                psProgSynthesis.learnspan,
                psProgSynthesis.genspan, (t2 - t1),
                examples.size(), examples,
                psProgSynthesis.partiCluster.failedCnt,
                checknumber, iterAfterNoFatalError, isvisible,
                pls.get(0).toString(), resultacc, clf_acc);
            dCollection.addEntry(fileStat);
          } else {
            FileStat fileStat = new FileStat(f.getName() + "",
                psProgSynthesis.learnspan,
                psProgSynthesis.genspan, (t2 - t1),
                examples.size(), examples,
                psProgSynthesis.partiCluster.failedCnt,
                checknumber, iterAfterNoFatalError, isvisible,
                pls.get(0).toString(), resultacc, clf_acc);
            dCollection.addEntry(fileStat);
            dCollection.addSucceededFile(f.getName());
            break;
          }
        }
      }
      return;
    } catch (Exception ex) {
      System.out.println("" + ex.toString());
      return;
    }
  }

}
TOP

Related Classes of edu.isi.karma.cleaning.Research.TimerRunner

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.