Package org.apache.mahout.df.data

Examples of org.apache.mahout.df.data.DataConverter


   *          number of running map tasks
   * @param numTrees
   *          total number of trees in the forest
   */
  protected void configure(Long seed, int partition, int numMapTasks, int numTrees) {
    converter = new DataConverter(getDataset());
   
    // prepare random-numders generator
    log.debug("seed : {}", seed);
    if (seed == null) {
      rng = RandomUtils.getRandom();
View Full Code Here


   
    /**
     * Useful when testing
     */
    protected void setup(Dataset dataset) {
      converter = new DataConverter(dataset);
      //log.info("labels: {}", Arrays.toString(dataset.labels()));
    }
View Full Code Here

    this.partition = partition;
    if (partition < 0) {
      throw new IllegalArgumentException("Wrong partition id : " + partition);
    }
   
    converter = new DataConverter(dataset);
   
    if (keys.length != trees.length) {
      throw new IllegalArgumentException("keys.length != trees.length");
    }
    int nbConcerned = keys.length;
View Full Code Here

    // foreach tuple of the data
    Path dataPath = new Path(dataStr);
    FileSystem ifs = dataPath.getFileSystem(conf);
    FSDataInputStream input = ifs.open(dataPath);
    Scanner scanner = new Scanner(input);
    DataConverter converter = new DataConverter(dataset);
    int nbInstances = dataset.nbInstances();
   
    int id = 0;
    while (scanner.hasNextLine()) {
      if (id % 1000 == 0) {
        log.info(String.format("progress : %d / %d", id, nbInstances));
      }
     
      String line = scanner.nextLine();
      if (line.isEmpty()) {
        continue; // skip empty lines
      }
     
      // write the tuple in files[tuple.label]
      Instance instance = converter.convert(id++, line);
      int label = instance.label;
      files[currents[label]].writeBytes(line);
      files[currents[label]].writeChar('\n');
     
      // update currents
View Full Code Here

   *          number of running map tasks
   * @param numTrees
   *          total number of trees in the forest
   */
  protected void configure(Long seed, int partition, int numMapTasks, int numTrees) {
    converter = new DataConverter(getDataset());
   
    // prepare random-numders generator
    log.debug("seed : {}", seed);
    if (seed == null) {
      rng = RandomUtils.getRandom();
View Full Code Here

    this.partition = partition;
    if (partition < 0) {
      throw new IllegalArgumentException("Wrong partition id : " + partition);
    }
   
    converter = new DataConverter(dataset);
   
    if (keys.length != trees.length) {
      throw new IllegalArgumentException("keys.length != trees.length");
    }
    int nbConcerned = keys.length;
View Full Code Here

    // write the data to a file
    Path dataPath = Utils.writeDataToTestFile(sData);
   
    // prepare a data converter
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    DataConverter converter = new DataConverter(dataset);
   
    JobConf job = new JobConf();
    job.setNumMapTasks(numMaps);
    FileInputFormat.setInputPaths(job, dataPath);

    // retrieve the splits
    TextInputFormat input = (TextInputFormat) job.getInputFormat();
    InputSplit[] splits = input.getSplits(job, numMaps);

    InputSplit[] sorted = Arrays.copyOf(splits, splits.length);
    Builder.sortSplits(sorted);

    Reporter reporter = Reporter.NULL;

    int[] keys = new int[numMaps];
    Step0Output[] values = new Step0Output[numMaps];
   
    int[] expectedIds = new int[numMaps];
   
    for (int p = 0; p < numMaps; p++) {
      InputSplit split = sorted[p];
      RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter);

      LongWritable key = reader.createKey();
      Text value = reader.createValue();

      Long firstKey = null;
      int size = 0;
     
      while (reader.next(key, value)) {
        if (firstKey == null) {
          firstKey = key.get();
          expectedIds[p] = converter.convert(0, value.toString()).label;
        }

        size++;
      }
     
View Full Code Here

      return;
    }

    // load the dataset
    Dataset dataset = Dataset.load(getConf(), datasetPath);
    DataConverter converter = new DataConverter(dataset);

    log.info("Sequential classification...");
    long time = System.currentTimeMillis();

    Random rng = RandomUtils.getRandom();
View Full Code Here

   */
  public void configure(int partition, Dataset dataset, TreeID[] keys, Node[] trees, int numInstances) {
    this.partition = partition;
    Preconditions.checkArgument(partition >= 0, "Wrong partition id : " + partition);
   
    converter = new DataConverter(dataset);

    Preconditions.checkArgument(keys.length == trees.length, "keys.length != trees.length");
    int nbConcerned = keys.length;
   
    this.keys = keys;
View Full Code Here

    // write the data to a file
    Path dataPath = Utils.writeDataToTestFile(sData);

    // prepare a data converter
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    DataConverter converter = new DataConverter(dataset);

    Job job = new Job();
    job.setInputFormatClass(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, dataPath);

    setMaxSplitSize(job.getConfiguration(), dataPath, NUM_MAPS);

    // retrieve the splits
    TextInputFormat input = new TextInputFormat();
    List<InputSplit> splits = input.getSplits(job);
    assertEquals(NUM_MAPS, splits.size());

    InputSplit[] sorted = new InputSplit[NUM_MAPS];
    splits.toArray(sorted);
    Builder.sortSplits(sorted);

    List<Integer> keys = new ArrayList<Integer>();
    List<Step0Output> values = new ArrayList<Step0Output>();

    int[] expectedIds = new int[NUM_MAPS];

    TaskAttemptContext context = new TaskAttemptContext(job.getConfiguration(),
        new TaskAttemptID());

    for (int p = 0; p < NUM_MAPS; p++) {
      InputSplit split = sorted[p];
      RecordReader<LongWritable, Text> reader = input.createRecordReader(split,
          context);
      reader.initialize(split, context);

      Long firstKey = null;
      int size = 0;

      while (reader.nextKeyValue()) {
        LongWritable key = reader.getCurrentKey();
        Text value = reader.getCurrentValue();

        if (firstKey == null) {
          firstKey = key.get();
          expectedIds[p] = converter.convert(0, value.toString()).getLabel();
        }

        size++;
      }
View Full Code Here

TOP

Related Classes of org.apache.mahout.df.data.DataConverter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.