Package org.apache.hadoop.mapreduce

Examples of org.apache.hadoop.mapreduce.Job


      checkMandatoryConfs();
    } catch (HIHOException e1) {
      e1.printStackTrace();
      throw new Exception(e1);
    }
    Job job = new Job(conf);
    job.setJobName("Dedup job");
    job.setJarByClass(DedupJob.class);

    Class inputFormatClass = Class.forName(inputFormat);
    Class outputFormatClass = Class.forName(outputFormat);
    Class inputKeyClass = Class.forName(inputKeyClassName);
    Class inputValueClass = Class.forName(inputValueClassName);

    if (dedupBy.equals("key")) {
      job.setMapperClass(DedupKeyMapper.class);
      job.setReducerClass(DedupKeyReducer.class);
      job.setMapOutputValueClass(inputValueClass);
    } else if (dedupBy.equals("value")) {
      job.setMapperClass(DedupValueMapper.class);
      job.setReducerClass(DedupValueReducer.class);
      job.setMapOutputValueClass(inputKeyClass);
    }

    job.setInputFormatClass(inputFormatClass);
    if (inputFormat
        .equals("co.nubetech.hiho.dedup.DelimitedTextInputFormat")) {
      DelimitedTextInputFormat.setProperties(job, delimiter, column);
    }

    job.setMapOutputKeyClass(HihoTuple.class);

    job.setOutputKeyClass(inputKeyClass);
    job.setOutputValueClass(inputValueClass);
    job.setPartitionerClass(HihoHashPartitioner.class);
    FileInputFormat.setInputPaths(job, inputPath);
    job.setOutputFormatClass(outputFormatClass);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    try {
      logger.debug("Output format class is " + job.getOutputFormatClass());
      logger.debug("Class is "
          + ReflectionUtils
              .newInstance(job.getOutputFormatClass(),
                  job.getConfiguration()).getClass()
              .getName());
      job.waitForCompletion(false);
      if (job.isComplete()) {
        Counters counters = job.getCounters();
        totalRecordsRead = counters.findCounter(
            DedupRecordCounter.TOTAL_RECORDS_READ).getValue();
        badRecords = counters.findCounter(
            DedupRecordCounter.BAD_RECORD).getValue();
        output = counters.findCounter(DedupRecordCounter.OUTPUT)
View Full Code Here


        io.lumify.core.config.Configuration lumifyConfig = ConfigurationLoader.load();
        Configuration conf = getConfiguration(args, lumifyConfig);
        AccumuloGraphConfiguration accumuloGraphConfiguration = new AccumuloGraphConfiguration(conf, "graph.");
//        InjectHelper.inject(this, LumifyBootstrap.bootstrapModuleMaker(lumifyConfig));

        Job job = new Job(conf, "GDELTImport");

        String instanceName = accumuloGraphConfiguration.getAccumuloInstanceName();
        String zooKeepers = accumuloGraphConfiguration.getZookeeperServers();
        String principal = accumuloGraphConfiguration.getAccumuloUsername();
        AuthenticationToken authorizationToken = accumuloGraphConfiguration.getAuthenticationToken();
        AccumuloElementOutputFormat.setOutputInfo(job, instanceName, zooKeepers, principal, authorizationToken);

        job.setJarByClass(GDELTRunner.class);
        job.setMapperClass(GDELTMapper.class);
        job.setMapOutputValueClass(Mutation.class);
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(AccumuloElementOutputFormat.class);
        FileInputFormat.addInputPath(job, new Path(conf.get("in")));

        int returnCode = job.waitForCompletion(true) ? 0 : 1;

        CounterGroup groupCounters = job.getCounters().getGroup(GDELTImportCounters.class.getName());
        for (Counter counter : groupCounters) {
            System.out.println(counter.getDisplayName() + ": " + counter.getValue());
        }

        return returnCode;
View Full Code Here

        InjectHelper.inject(this, LumifyBootstrap.bootstrapModuleMaker(lumifyConfig));

        verifyFriendsterUserConcept(ontologyRepository);
        verifyFriendsterUserToUserRelationship(ontologyRepository);

        Job job = new Job(conf, "friendsterImport");

        String instanceName = accumuloGraphConfiguration.getAccumuloInstanceName();
        String zooKeepers = accumuloGraphConfiguration.getZookeeperServers();
        String principal = accumuloGraphConfiguration.getAccumuloUsername();
        AuthenticationToken authorizationToken = accumuloGraphConfiguration.getAuthenticationToken();
        AccumuloElementOutputFormat.setOutputInfo(job, instanceName, zooKeepers, principal, authorizationToken);

        List<Text> splits = getSplits((AccumuloGraph) graph);
        Path splitFile = writeSplitsFile(conf, splits);

        if (job.getConfiguration().get("mapred.job.tracker").equals("local")) {
            LOGGER.warn("!!!!!! Running in local mode !!!!!!");
        } else {
            job.setPartitionerClass(RangePartitioner.class);
            RangePartitioner.setSplitFile(job, splitFile.toString());
            job.setNumReduceTasks(splits.size() + 1);
        }

        job.setJarByClass(ImportMR.class);
        job.setMapperClass(ImportMRMapper.class);
        job.setMapOutputValueClass(Mutation.class);
        job.setReducerClass(ImportMRReducer.class);
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(AccumuloElementOutputFormat.class);
        FileInputFormat.addInputPath(job, new Path(conf.get("in")));

        int returnCode = job.waitForCompletion(true) ? 0 : 1;

        CounterGroup groupCounters = job.getCounters().getGroup(FriendsterImportCounters.class.getName());
        for (Counter counter : groupCounters) {
            System.out.println(counter.getDisplayName() + ": " + counter.getValue());
        }

        return returnCode;
View Full Code Here

        io.lumify.core.config.Configuration lumifyConfig = ConfigurationLoader.load();
        JobConf conf = getConfiguration(args, lumifyConfig);
        AccumuloGraphConfiguration accumuloGraphConfiguration = new AccumuloGraphConfiguration(conf, "graph.");
        InjectHelper.inject(this, LumifyBootstrap.bootstrapModuleMaker(lumifyConfig));

        Job job = new Job(conf, getJobName());

        instanceName = accumuloGraphConfiguration.getAccumuloInstanceName();
        zooKeepers = accumuloGraphConfiguration.getZookeeperServers();
        principal = accumuloGraphConfiguration.getAccumuloUsername();
        authorizationToken = accumuloGraphConfiguration.getAuthenticationToken();
        AccumuloElementOutputFormat.setOutputInfo(job, instanceName, zooKeepers, principal, authorizationToken);

        boolean periodicCounterOutput = conf.getBoolean("lumify.periodic.counter.output.enabled", false);

        if (job.getConfiguration().get("mapred.job.tracker").equals("local")) {
            LOGGER.warn("!!!!!! Running in local mode !!!!!!");
            local = true;
            periodicCounterOutput = true;
        }

        setupJob(job);

        if (periodicCounterOutput) {
            startPeriodicCounterOutputThread(job);
        }

        LOGGER.info("Starting job");
        long startTime = System.currentTimeMillis();
        int result = job.waitForCompletion(true) ? 0 : 1;
        long endTime = System.currentTimeMillis();
        LOGGER.info("Job complete");

        if (periodicCounterOutputTimer != null) {
            periodicCounterOutputTimer.cancel();
View Full Code Here

            } else if (mJobs.isEmpty()) {
              break;
            }
          }
          while (!mJobs.isEmpty()) {
            Job job;
            synchronized (mJobs) {
              job = mJobs.poll();
            }
            try {
              if (job.isComplete()) {
                process(job);
                statistics.add(job);
                continue;
              }
            } catch (IOException e) {
              if (e.getCause() instanceof ClosedByInterruptException ||
                  e.getCause() instanceof InterruptedException) {
                // Job doesn't throw InterruptedException, but RPC socket layer
                // is blocking and may throw a wrapped Exception if this thread
                // is interrupted. Since the lower level cleared the flag,
                // reset it here
                Thread.currentThread().interrupt();
              } else {
                LOG.warn("Lost job " + (null == job.getJobName()
                     ? "<unknown>" : job.getJobName()), e);
                continue;
              }
            }
            synchronized (mJobs) {
              if (!mJobs.offer(job)) {
                LOG.error("Lost job " + (null == job.getJobName()
                     ? "<unknown>" : job.getJobName())); // should never
                                                         // happen
              }
            }
            break;
          }
View Full Code Here

   * @throws Exception If an error occurs creating job configuration.
   */
  public static Job createCopyJob(Configuration conf, Path outdir,
      Path... indirs) throws Exception {
    conf.setInt("mapred.map.tasks", 3);
    Job theJob = new Job(conf);
    theJob.setJobName("DataMoveJob");

    FileInputFormat.setInputPaths(theJob, indirs);
    theJob.setMapperClass(DataCopyMapper.class);
    FileOutputFormat.setOutputPath(theJob, outdir);
    theJob.setOutputKeyClass(Text.class);
    theJob.setOutputValueClass(Text.class);
    theJob.setReducerClass(DataCopyReducer.class);
    theJob.setNumReduceTasks(1);
    return theJob;
  }
View Full Code Here

    FileSystem fs = outdir.getFileSystem(conf);
    if (fs.exists(outdir)) {
      fs.delete(outdir, true);
    }
    conf.setInt("mapred.map.max.attempts", 2);
    Job theJob = new Job(conf);
    theJob.setJobName("Fail-Job");

    FileInputFormat.setInputPaths(theJob, indirs);
    theJob.setMapperClass(FailMapper.class);
    theJob.setReducerClass(Reducer.class);
    theJob.setNumReduceTasks(0);
    FileOutputFormat.setOutputPath(theJob, outdir);
    theJob.setOutputKeyClass(Text.class);
    theJob.setOutputValueClass(Text.class);
    return theJob;
  }
View Full Code Here

   * @throws Exception If an error occurs creating job configuration.
   */
  public static Job createKillJob(Configuration conf, Path outdir,
      Path... indirs) throws Exception {

    Job theJob = new Job(conf);
    theJob.setJobName("Kill-Job");

    FileInputFormat.setInputPaths(theJob, indirs);
    theJob.setMapperClass(KillMapper.class);
    theJob.setReducerClass(Reducer.class);
    theJob.setNumReduceTasks(0);
    FileOutputFormat.setOutputPath(theJob, outdir);
    theJob.setOutputKeyClass(Text.class);
    theJob.setOutputValueClass(Text.class);
    return theJob;
  }
View Full Code Here

    return createJob(conf, inDir, outDir, numInputFiles, numReds, input);
  }

  public static Job createJob(Configuration conf, Path inDir, Path outDir,
      int numInputFiles, int numReds, String input) throws IOException {
    Job job = new Job(conf);
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(outDir)) {
      fs.delete(outDir, true);
    }
    if (fs.exists(inDir)) {
      fs.delete(inDir, true);
    }
    fs.mkdirs(inDir);
    for (int i = 0; i < numInputFiles; ++i) {
      DataOutputStream file = fs.create(new Path(inDir, "part-" + i));
      file.writeBytes(input);
      file.close();
    }   

    FileInputFormat.setInputPaths(job, inDir);
    FileOutputFormat.setOutputPath(job, outDir);
    job.setNumReduceTasks(numReds);
    return job;
  }
View Full Code Here

  private void failJob(Configuration conf,
      Class<? extends OutputCommitter> theClass, String confName,
      Class<? extends Mapper> mapClass, Class<? extends Reducer> redClass,
      boolean isUserKill)
      throws Exception {
    Job job = new Job(conf, confName);
    job.setJarByClass(JobKillCommitter.class);
    job.setMapperClass(mapClass);
    job.setCombinerClass(redClass);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setReducerClass(redClass);
    job.setNumReduceTasks(1);
    FileInputFormat.addInputPath(job, inDir);
    FileOutputFormat.setOutputPath(job, outDir);
    JobConf jconf = new JobConf(job.getConfiguration(), JobKillCommitter.class);
    jconf.setOutputCommitter(theClass);
    if(!isUserKill)
    { 
      RunningJob rJob = cluster.getJTClient().getClient().submitJob(jconf);
      JobID id = rJob.getID();
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapreduce.Job

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.