Examples of PartitionKeySampler

org.apache.hadoop.hive.ql.exec.PartitionKeySampler

Examples of org.apache.hadoop.hive.ql.exec.PartitionKeySampler

    String tmpPath = context.getCtx().getExternalTmpFileURI(onePath.toUri());


    Path partitionFile = new Path(tmpPath, ".partitions");
    ShimLoader.getHadoopShims().setTotalOrderPartitionFile(job, partitionFile);


    PartitionKeySampler sampler = new PartitionKeySampler();


    if (mWork.getSamplingType() == MapWork.SAMPLING_ON_PREV_MR) {
      console.printInfo("Use sampling data created in previous MR");
      // merges sampling data from previous MR and make paritition keys for total sort
      for (String path : paths) {
        Path inputPath = new Path(path);
        FileSystem fs = inputPath.getFileSystem(job);
        for (FileStatus status : fs.globStatus(new Path(inputPath, ".sampling*"))) {
          sampler.addSampleFile(status.getPath(), job);
        }
      }
    } else if (mWork.getSamplingType() == MapWork.SAMPLING_ON_START) {
      console.printInfo("Creating sampling data..");
      assert topOp instanceof TableScanOperator;
      TableScanOperator ts = (TableScanOperator) topOp;


      FetchWork fetchWork;
      if (!partDesc.isPartitioned()) {
        assert paths.size() == 1;
        fetchWork = new FetchWork(paths.get(0), partDesc.getTableDesc());
      } else {
        fetchWork = new FetchWork(paths, parts, partDesc.getTableDesc());
      }
      fetchWork.setSource(ts);


      // random sampling
      FetchOperator fetcher = PartitionKeySampler.createSampler(fetchWork, conf, job, ts);
      try {
        ts.initialize(conf, new ObjectInspector[]{fetcher.getOutputObjectInspector()});
        ts.setOutputCollector(sampler);
        while (fetcher.pushRow()) { }
      } finally {
        fetcher.clearFetchContext();
      }
    } else {
      throw new IllegalArgumentException("Invalid sampling type " + mWork.getSamplingType());
    }
    sampler.writePartitionKeys(partitionFile, job);
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.PartitionKeySampler

    }


    Path tmpPath = context.getCtx().getExternalTmpPath(inputPaths.get(0).toUri());
    Path partitionFile = new Path(tmpPath, ".partitions");
    ShimLoader.getHadoopShims().setTotalOrderPartitionFile(job, partitionFile);
    PartitionKeySampler sampler = new PartitionKeySampler();


    if (mWork.getSamplingType() == MapWork.SAMPLING_ON_PREV_MR) {
      console.printInfo("Use sampling data created in previous MR");
      // merges sampling data from previous MR and make partition keys for total sort
      for (Path path : inputPaths) {
        FileSystem fs = path.getFileSystem(job);
        for (FileStatus status : fs.globStatus(new Path(path, ".sampling*"))) {
          sampler.addSampleFile(status.getPath(), job);
        }
      }
    } else if (mWork.getSamplingType() == MapWork.SAMPLING_ON_START) {
      console.printInfo("Creating sampling data..");
      assert topOp instanceof TableScanOperator;
      TableScanOperator ts = (TableScanOperator) topOp;


      FetchWork fetchWork;
      if (!partDesc.isPartitioned()) {
        assert paths.size() == 1;
        fetchWork = new FetchWork(inputPaths.get(0), partDesc.getTableDesc());
      } else {
        fetchWork = new FetchWork(inputPaths, parts, partDesc.getTableDesc());
      }
      fetchWork.setSource(ts);


      // random sampling
      FetchOperator fetcher = PartitionKeySampler.createSampler(fetchWork, conf, job, ts);
      try {
        ts.initialize(conf, new ObjectInspector[]{fetcher.getOutputObjectInspector()});
        OperatorUtils.setChildrenCollector(ts.getChildOperators(), sampler);
        while (fetcher.pushRow()) { }
      } finally {
        fetcher.clearFetchContext();
      }
    } else {
      throw new IllegalArgumentException("Invalid sampling type " + mWork.getSamplingType());
    }
    sampler.writePartitionKeys(partitionFile, job);
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.PartitionKeySampler

    }


    Path tmpPath = context.getCtx().getExternalTmpPath(inputPaths.get(0));
    Path partitionFile = new Path(tmpPath, ".partitions");
    ShimLoader.getHadoopShims().setTotalOrderPartitionFile(job, partitionFile);
    PartitionKeySampler sampler = new PartitionKeySampler();


    if (mWork.getSamplingType() == MapWork.SAMPLING_ON_PREV_MR) {
      console.printInfo("Use sampling data created in previous MR");
      // merges sampling data from previous MR and make partition keys for total sort
      for (Path path : inputPaths) {
        FileSystem fs = path.getFileSystem(job);
        for (FileStatus status : fs.globStatus(new Path(path, ".sampling*"))) {
          sampler.addSampleFile(status.getPath(), job);
        }
      }
    } else if (mWork.getSamplingType() == MapWork.SAMPLING_ON_START) {
      console.printInfo("Creating sampling data..");
      assert topOp instanceof TableScanOperator;
      TableScanOperator ts = (TableScanOperator) topOp;


      FetchWork fetchWork;
      if (!partDesc.isPartitioned()) {
        assert paths.size() == 1;
        fetchWork = new FetchWork(inputPaths.get(0), partDesc.getTableDesc());
      } else {
        fetchWork = new FetchWork(inputPaths, parts, partDesc.getTableDesc());
      }
      fetchWork.setSource(ts);


      // random sampling
      FetchOperator fetcher = PartitionKeySampler.createSampler(fetchWork, conf, job, ts);
      try {
        ts.initialize(conf, new ObjectInspector[]{fetcher.getOutputObjectInspector()});
        OperatorUtils.setChildrenCollector(ts.getChildOperators(), sampler);
        while (fetcher.pushRow()) { }
      } finally {
        fetcher.clearFetchContext();
      }
    } else {
      throw new IllegalArgumentException("Invalid sampling type " + mWork.getSamplingType());
    }
    sampler.writePartitionKeys(partitionFile, conf, job);
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.PartitionKeySampler

    }


    Path tmpPath = context.getCtx().getExternalTmpPath(inputPaths.get(0).toUri());
    Path partitionFile = new Path(tmpPath, ".partitions");
    ShimLoader.getHadoopShims().setTotalOrderPartitionFile(job, partitionFile);
    PartitionKeySampler sampler = new PartitionKeySampler();


    if (mWork.getSamplingType() == MapWork.SAMPLING_ON_PREV_MR) {
      console.printInfo("Use sampling data created in previous MR");
      // merges sampling data from previous MR and make partition keys for total sort
      for (Path path : inputPaths) {
        FileSystem fs = path.getFileSystem(job);
        for (FileStatus status : fs.globStatus(new Path(path, ".sampling*"))) {
          sampler.addSampleFile(status.getPath(), job);
        }
      }
    } else if (mWork.getSamplingType() == MapWork.SAMPLING_ON_START) {
      console.printInfo("Creating sampling data..");
      assert topOp instanceof TableScanOperator;
      TableScanOperator ts = (TableScanOperator) topOp;


      FetchWork fetchWork;
      if (!partDesc.isPartitioned()) {
        assert paths.size() == 1;
        fetchWork = new FetchWork(inputPaths.get(0), partDesc.getTableDesc());
      } else {
        fetchWork = new FetchWork(inputPaths, parts, partDesc.getTableDesc());
      }
      fetchWork.setSource(ts);


      // random sampling
      FetchOperator fetcher = PartitionKeySampler.createSampler(fetchWork, conf, job, ts);
      try {
        ts.initialize(conf, new ObjectInspector[]{fetcher.getOutputObjectInspector()});
        OperatorUtils.setChildrenCollector(ts.getChildOperators(), sampler);
        while (fetcher.pushRow()) { }
      } finally {
        fetcher.clearFetchContext();
      }
    } else {
      throw new IllegalArgumentException("Invalid sampling type " + mWork.getSamplingType());
    }
    sampler.writePartitionKeys(partitionFile, job);
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.PartitionKeySampler

    String tmpPath = context.getCtx().getExternalTmpFileURI(onePath.toUri());


    Path partitionFile = new Path(tmpPath, ".partitions");
    ShimLoader.getHadoopShims().setTotalOrderPartitionFile(job, partitionFile);


    PartitionKeySampler sampler = new PartitionKeySampler();


    if (mWork.getSamplingType() == MapWork.SAMPLING_ON_PREV_MR) {
      console.printInfo("Use sampling data created in previous MR");
      // merges sampling data from previous MR and make paritition keys for total sort
      for (String path : paths) {
        Path inputPath = new Path(path);
        FileSystem fs = inputPath.getFileSystem(job);
        for (FileStatus status : fs.globStatus(new Path(inputPath, ".sampling*"))) {
          sampler.addSampleFile(status.getPath(), job);
        }
      }
    } else if (mWork.getSamplingType() == MapWork.SAMPLING_ON_START) {
      console.printInfo("Creating sampling data..");
      assert topOp instanceof TableScanOperator;
      TableScanOperator ts = (TableScanOperator) topOp;


      FetchWork fetchWork;
      if (!partDesc.isPartitioned()) {
        assert paths.size() == 1;
        fetchWork = new FetchWork(paths.get(0), partDesc.getTableDesc());
      } else {
        fetchWork = new FetchWork(paths, parts, partDesc.getTableDesc());
      }
      fetchWork.setSource(ts);


      // random sampling
      FetchOperator fetcher = PartitionKeySampler.createSampler(fetchWork, conf, job, ts);
      try {
        ts.initialize(conf, new ObjectInspector[]{fetcher.getOutputObjectInspector()});
        ts.setOutputCollector(sampler);
        while (fetcher.pushRow()) { }
      } finally {
        fetcher.clearFetchContext();
      }
    } else {
      throw new IllegalArgumentException("Invalid sampling type " + mWork.getSamplingType());
    }
    sampler.writePartitionKeys(partitionFile, job);
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.PartitionKeySampler

    }


    Path tmpPath = context.getCtx().getExternalTmpPath(inputPaths.get(0).toUri());
    Path partitionFile = new Path(tmpPath, ".partitions");
    ShimLoader.getHadoopShims().setTotalOrderPartitionFile(job, partitionFile);
    PartitionKeySampler sampler = new PartitionKeySampler();


    if (mWork.getSamplingType() == MapWork.SAMPLING_ON_PREV_MR) {
      console.printInfo("Use sampling data created in previous MR");
      // merges sampling data from previous MR and make partition keys for total sort
      for (Path path : inputPaths) {
        FileSystem fs = path.getFileSystem(job);
        for (FileStatus status : fs.globStatus(new Path(path, ".sampling*"))) {
          sampler.addSampleFile(status.getPath(), job);
        }
      }
    } else if (mWork.getSamplingType() == MapWork.SAMPLING_ON_START) {
      console.printInfo("Creating sampling data..");
      assert topOp instanceof TableScanOperator;
      TableScanOperator ts = (TableScanOperator) topOp;


      FetchWork fetchWork;
      if (!partDesc.isPartitioned()) {
        assert paths.size() == 1;
        fetchWork = new FetchWork(inputPaths.get(0), partDesc.getTableDesc());
      } else {
        fetchWork = new FetchWork(inputPaths, parts, partDesc.getTableDesc());
      }
      fetchWork.setSource(ts);


      // random sampling
      FetchOperator fetcher = PartitionKeySampler.createSampler(fetchWork, conf, job, ts);
      try {
        ts.initialize(conf, new ObjectInspector[]{fetcher.getOutputObjectInspector()});
        OperatorUtils.setChildrenCollector(ts.getChildOperators(), sampler);
        while (fetcher.pushRow()) { }
      } finally {
        fetcher.clearFetchContext();
      }
    } else {
      throw new IllegalArgumentException("Invalid sampling type " + mWork.getSamplingType());
    }
    sampler.writePartitionKeys(partitionFile, job);
  }

View Full Code Here

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.