Examples of JobClient


Examples of org.apache.hadoop.mapred.JobClient

      CommandLineUtil.printHelp(group);
    }
  }
 
  public static void runJob(String input, String output) throws IOException {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(org.apache.mahout.clustering.syntheticcontrol.meanshift.InputDriver.class);
   
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(MeanShiftCanopy.class);
   
    FileInputFormat.setInputPaths(conf, new Path(input));
    FileOutputFormat.setOutputPath(conf, new Path(output));
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setMapperClass(org.apache.mahout.clustering.syntheticcontrol.meanshift.InputMapper.class);
    conf.setReducerClass(Reducer.class);
    conf.setNumReduceTasks(0);
   
    client.setConf(conf);
    JobClient.runJob(conf);
  }
View Full Code Here

Examples of org.apache.hadoop.mapred.JobClient

      CommandLineUtil.printHelp(group);
    }
  }
 
  public static void runJob(String input, String output, String vectorClassName) throws IOException {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(InputDriver.class);
   
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(VectorWritable.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.set("vector.implementation.class.name", vectorClassName);
    FileInputFormat.setInputPaths(conf, new Path(input));
    FileOutputFormat.setOutputPath(conf, new Path(output));
   
    conf.setMapperClass(InputMapper.class);
   
    conf.setReducerClass(Reducer.class);
    conf.setNumReduceTasks(0);
   
    client.setConf(conf);
    JobClient.runJob(conf);
  }
View Full Code Here

Examples of org.apache.hadoop.mapred.JobClient

                             String measureClass,
                             double t1,
                             double t2,
                             double convergenceDelta,
                             int maxIterations) throws IOException {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(Job.class);
   
    Path outPath = new Path(output);
    client.setConf(conf);
    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
      dfs.delete(outPath, true);
    }
    String directoryContainingConvertedInput = output + Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT;
View Full Code Here

Examples of org.apache.hadoop.mapred.JobClient

      }
     
      if (cmdLine.hasOption(seqOpt)) {
        Path path = new Path(cmdLine.getValue(seqOpt).toString());
        System.out.println("Input Path: " + path);
        JobClient client = new JobClient();
        JobConf conf = new JobConf(Job.class);
        client.setConf(conf);
       
        FileSystem fs = FileSystem.get(path.toUri(), conf);
       
        String dictionaryType = "text";
        if (cmdLine.hasOption(dictTypeOpt)) {
View Full Code Here

Examples of org.apache.hadoop.mapred.JobClient

                             String measureClassName,
                             double t1,
                             double t2,
                             double convergenceDelta,
                             int maxIterations) throws IOException {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(Job.class);
   
    Path outPath = new Path(output);
    client.setConf(conf);
    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
      dfs.delete(outPath, true);
    }
    String directoryContainingConvertedInput = output + Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT;
View Full Code Here

Examples of org.apache.hadoop.mapred.JobClient

      CommandLineUtil.printHelp(group);
    }
  }
 
  public static void runJob(String input, String output) throws IOException {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(org.apache.mahout.clustering.syntheticcontrol.meanshift.OutputDriver.class);
   
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setInputFormat(SequenceFileInputFormat.class);
   
    FileInputFormat.setInputPaths(conf, new Path(input));
    FileOutputFormat.setOutputPath(conf, new Path(output));
   
    conf.setMapperClass(OutputMapper.class);
   
    conf.setReducerClass(Reducer.class);
    conf.setNumReduceTasks(0);
   
    client.setConf(conf);
    JobClient.runJob(conf);
  }
View Full Code Here

Examples of org.apache.hadoop.mapred.JobClient

   * @throws ClassNotFoundException
   */
  @Override
  public void runJob(String input, String output, BayesParameters params) throws IOException {
   
    Configurable client = new JobClient();
    JobConf conf = new JobConf(BayesWeightSummerDriver.class);
    conf.setJobName("TfIdf Driver running over input: " + input);
   
    conf.setOutputKeyClass(StringTuple.class);
    conf.setOutputValueClass(DoubleWritable.class);
   
    FileInputFormat.addInputPath(conf, new Path(output + "/trainer-termDocCount"));
    FileInputFormat.addInputPath(conf, new Path(output + "/trainer-wordFreq"));
    FileInputFormat.addInputPath(conf, new Path(output + "/trainer-featureCount"));
    Path outPath = new Path(output + "/trainer-tfIdf/");
    FileOutputFormat.setOutputPath(conf, outPath);
   
    // conf.setNumMapTasks(100);
   
    conf.setJarByClass(BayesTfIdfDriver.class);
   
    conf.setMapperClass(BayesTfIdfMapper.class);
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setCombinerClass(BayesTfIdfReducer.class);
   
    conf.setReducerClass(BayesTfIdfReducer.class);
   
    conf.setOutputFormat(BayesTfIdfOutputFormat.class);
   
    conf
        .set("io.serializations",
          "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
    // Dont ever forget this. People should keep track of how hadoop conf
    // parameters and make or break a piece of code
   
    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
      dfs.delete(outPath, true);
    }
   
    Path interimFile = new Path(output + "/trainer-docCount/part-*");
   
    Map<String,Double> labelDocumentCounts = SequenceFileModelReader.readLabelDocumentCounts(dfs,
      interimFile, conf);
   
    DefaultStringifier<Map<String,Double>> mapStringifier = new DefaultStringifier<Map<String,Double>>(conf,
        GenericsUtil.getClass(labelDocumentCounts));
   
    String labelDocumentCountString = mapStringifier.toString(labelDocumentCounts);
    log.info("Counts of documents in Each Label");
    Map<String,Double> c = mapStringifier.fromString(labelDocumentCountString);
    log.info("{}", c);
   
    conf.set("cnaivebayes.labelDocumentCounts", labelDocumentCountString);
    log.info(params.print());
    if (params.get("dataSource").equals("hbase")) {
      HBaseConfiguration hc = new HBaseConfiguration(new Configuration());
      HTableDescriptor ht = new HTableDescriptor(output);
      HColumnDescriptor hcd = new HColumnDescriptor(BayesConstants.HBASE_COLUMN_FAMILY + ':');
      hcd.setBloomfilter(true);
      hcd.setInMemory(true);
      hcd.setMaxVersions(1);
      hcd.setBlockCacheEnabled(true);
      ht.addFamily(hcd);
     
      log.info("Connecting to hbase...");
      HBaseAdmin hba = new HBaseAdmin(hc);
      log.info("Creating Table {}", output);
     
      if (hba.tableExists(output)) {
        hba.disableTable(output);
        hba.deleteTable(output);
        hba.majorCompact(".META.");
      }
      hba.createTable(ht);
      conf.set("output.table", output);
    }
    conf.set("bayes.parameters", params.toString());
   
    client.setConf(conf);
   
    JobClient.runJob(conf);
  }
View Full Code Here

Examples of org.apache.hadoop.mapred.JobClient

   * @param t2
   *          the canopy T2 threshold
   */
  private static void runJob(String input, String output, String measureClassName,
                             double t1, double t2) throws IOException {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(Job.class);
   
    Path outPath = new Path(output);
    client.setConf(conf);
    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
      dfs.delete(outPath, true);
    }
    String directoryContainingConvertedInput = output + Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT;
View Full Code Here

Examples of org.apache.hadoop.mapred.JobClient

   * @param output
   *          the output pathname String
   */
  @Override
  public void runJob(String input, String output, BayesParameters params) throws IOException {
    Configurable client = new JobClient();
    JobConf conf = new JobConf(BayesWeightSummerDriver.class);
    conf.setJobName("Bayes Weight Summer Driver running over input: " + input);
   
    conf.setOutputKeyClass(StringTuple.class);
    conf.setOutputValueClass(DoubleWritable.class);
   
    FileInputFormat.addInputPath(conf, new Path(output + "/trainer-tfIdf/trainer-tfIdf"));
    Path outPath = new Path(output + "/trainer-weights");
    FileOutputFormat.setOutputPath(conf, outPath);
    // conf.setNumReduceTasks(1);
    // conf.setNumMapTasks(100);
    conf.setMapperClass(BayesWeightSummerMapper.class);
    // see the javadoc for the spec for file input formats: first token is key,
    // rest is input. Whole document on one line
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setCombinerClass(BayesWeightSummerReducer.class);
    conf.setReducerClass(BayesWeightSummerReducer.class);
    conf.setOutputFormat(BayesWeightSummerOutputFormat.class);
    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
      dfs.delete(outPath, true);
    }
    conf.set("bayes.parameters", params.toString());
   
    conf.set("output.table", output);
   
    client.setConf(conf);
   
    JobClient.runJob(conf);
  }
View Full Code Here

Examples of org.apache.hadoop.mapred.JobClient

   *
   * @param params
   *          The Job parameters containing the gramSize, input output folders, defaultCat, encoding
   */
  public static void runJob(Parameters params) throws IOException {
    Configurable client = new JobClient();
    JobConf conf = new JobConf(BayesClassifierDriver.class);
    conf.setJobName("Bayes Classifier Driver running over input: " + params.get("testDirPath"));
    conf.setOutputKeyClass(StringTuple.class);
    conf.setOutputValueClass(DoubleWritable.class);
   
    FileInputFormat.setInputPaths(conf, new Path(params.get("testDirPath")));
    Path outPath = new Path(params.get("testDirPath") + "-output");
    FileOutputFormat.setOutputPath(conf, outPath);
   
    conf.setInputFormat(KeyValueTextInputFormat.class);
    conf.setMapperClass(BayesClassifierMapper.class);
    conf.setCombinerClass(BayesClassifierReducer.class);
    conf.setReducerClass(BayesClassifierReducer.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
   
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
                                  + "org.apache.hadoop.io.serializer.WritableSerialization");
   
    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
      dfs.delete(outPath, true);
    }
    conf.set("bayes.parameters", params.toString());
   
    client.setConf(conf);
    JobClient.runJob(conf);
   
    Path outputFiles = new Path(outPath.toString() + "/part*");
    ConfusionMatrix matrix = readResult(dfs, outputFiles, conf, params);
    log.info("{}", matrix.summarize());
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.