Examples of org.apache.mahout.df.data.DataConverter

org.apache.mahout.df.data.DataConverter
Converts String to Instance using a Dataset

    // write the data to a file
    Path dataPath = Utils.writeDataToTestFile(sData);


    // prepare a data converter
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    DataConverter converter = new DataConverter(dataset);


    Job job = new Job();
    job.setInputFormatClass(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, dataPath);


    setMaxSplitSize(job.getConfiguration(), dataPath, numMaps);


    // retrieve the splits
    TextInputFormat input = new TextInputFormat();
    List<InputSplit> splits = input.getSplits(job);
    assertEquals(numMaps, splits.size());


    InputSplit[] sorted = new InputSplit[numMaps];
    splits.toArray(sorted);
    Builder.sortSplits(sorted);


    List<Integer> keys = new ArrayList<Integer>();
    List<Step0Output> values = new ArrayList<Step0Output>();


    int[] expectedIds = new int[numMaps];


    TaskAttemptContext context = new TaskAttemptContext(job.getConfiguration(),
        new TaskAttemptID());


    for (int p = 0; p < numMaps; p++) {
      InputSplit split = sorted[p];
      RecordReader<LongWritable, Text> reader = input.createRecordReader(split,
          context);
      reader.initialize(split, context);


      Long firstKey = null;
      int size = 0;


      while (reader.nextKeyValue()) {
        LongWritable key = reader.getCurrentKey();
        Text value = reader.getCurrentValue();


        if (firstKey == null) {
          firstKey = key.get();
          expectedIds[p] = converter.convert(0, value.toString()).label;
        }


        size++;
      }

View Full Code Here

   */
  public void configure(int partition, Dataset dataset, TreeID[] keys, Node[] trees, int numInstances) {
    this.partition = partition;
    Preconditions.checkArgument(partition >= 0, "Wrong partition id : " + partition);
    
    converter = new DataConverter(dataset);


    Preconditions.checkArgument(keys.length == trees.length, "keys.length != trees.length");
    int nbConcerned = keys.length;
    
    this.keys = keys;

View Full Code Here

      return;
    }


    // load the dataset
    Dataset dataset = Dataset.load(getConf(), datasetPath);
    DataConverter converter = new DataConverter(dataset);


    log.info("Sequential classification...");
    long time = System.currentTimeMillis();


    Random rng = RandomUtils.getRandom();

View Full Code Here

        throw new IOException("not enough paths in the DistributedCache");
      }


      Dataset dataset = Dataset.load(conf, new Path(files[0].getPath()));


      converter = new DataConverter(dataset);


      forest = DecisionForest.load(conf, new Path(files[1].getPath()));
      if (forest == null) {
        throw new InterruptedException("DecisionForest not found!");
      }

View Full Code Here

    
    /**
     * Useful when testing
     */
    protected void setup(Dataset dataset) {
      converter = new DataConverter(dataset);
    }

View Full Code Here

0 1 2

TOP

Related Classes of org.apache.mahout.df.data.DataConverter

org.apache.mahout.df.mapred.partial.Step0JobTest

org.apache.mahout.df.mapred.partial.Step1Mapper

org.apache.mahout.df.mapred.partial.Step2Mapper

org.apache.mahout.df.mapreduce.Classifier$CMapper

org.apache.mahout.df.mapreduce.partial.Step0JobTest

org.apache.mahout.df.mapreduce.partial.Step1Mapper

org.apache.mahout.df.mapreduce.partial.Step2Mapper

org.apache.mahout.df.mapreduce.TestForest

org.apache.mahout.df.tools.FrequenciesJob$FrequenciesMapper

org.apache.mahout.df.tools.UDistrib

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.