Package org.apache.mahout.df.data

Examples of org.apache.mahout.df.data.DataConverter


    // write the data to a file
    Path dataPath = Utils.writeDataToTestFile(sData);

    // prepare a data converter
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    DataConverter converter = new DataConverter(dataset);

    Job job = new Job();
    job.setInputFormatClass(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, dataPath);

    setMaxSplitSize(job.getConfiguration(), dataPath, numMaps);

    // retrieve the splits
    TextInputFormat input = new TextInputFormat();
    List<InputSplit> splits = input.getSplits(job);
    assertEquals(numMaps, splits.size());

    InputSplit[] sorted = new InputSplit[numMaps];
    splits.toArray(sorted);
    Builder.sortSplits(sorted);

    List<Integer> keys = new ArrayList<Integer>();
    List<Step0Output> values = new ArrayList<Step0Output>();

    int[] expectedIds = new int[numMaps];

    TaskAttemptContext context = new TaskAttemptContext(job.getConfiguration(),
        new TaskAttemptID());

    for (int p = 0; p < numMaps; p++) {
      InputSplit split = sorted[p];
      RecordReader<LongWritable, Text> reader = input.createRecordReader(split,
          context);
      reader.initialize(split, context);

      Long firstKey = null;
      int size = 0;

      while (reader.nextKeyValue()) {
        LongWritable key = reader.getCurrentKey();
        Text value = reader.getCurrentValue();

        if (firstKey == null) {
          firstKey = key.get();
          expectedIds[p] = converter.convert(0, value.toString()).label;
        }

        size++;
      }
View Full Code Here


   */
  public void configure(int partition, Dataset dataset, TreeID[] keys, Node[] trees, int numInstances) {
    this.partition = partition;
    Preconditions.checkArgument(partition >= 0, "Wrong partition id : " + partition);
   
    converter = new DataConverter(dataset);

    Preconditions.checkArgument(keys.length == trees.length, "keys.length != trees.length");
    int nbConcerned = keys.length;
   
    this.keys = keys;
View Full Code Here

      return;
    }

    // load the dataset
    Dataset dataset = Dataset.load(getConf(), datasetPath);
    DataConverter converter = new DataConverter(dataset);

    log.info("Sequential classification...");
    long time = System.currentTimeMillis();

    Random rng = RandomUtils.getRandom();
View Full Code Here

        throw new IOException("not enough paths in the DistributedCache");
      }

      Dataset dataset = Dataset.load(conf, new Path(files[0].getPath()));

      converter = new DataConverter(dataset);

      forest = DecisionForest.load(conf, new Path(files[1].getPath()));
      if (forest == null) {
        throw new InterruptedException("DecisionForest not found!");
      }
View Full Code Here

   
    /**
     * Useful when testing
     */
    protected void setup(Dataset dataset) {
      converter = new DataConverter(dataset);
    }
View Full Code Here

TOP

Related Classes of org.apache.mahout.df.data.DataConverter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.