Package org.apache.mahout.df.data

Examples of org.apache.mahout.df.data.Dataset


    // write the data to a file
    Path dataPath = Utils.writeDataToTestFile(sData);
   
    // prepare a data converter
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    DataConverter converter = new DataConverter(dataset);
   
    JobConf job = new JobConf();
    job.setNumMapTasks(numMaps);
    FileInputFormat.setInputPaths(job, dataPath);
View Full Code Here


    // prepare the data
    String descriptor = Utils.randomDescriptor(rng, nbAttributes);
    double[][] source = Utils.randomDoubles(rng, descriptor, nbInstances);
    String[] sData = Utils.double2String(source);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    String[][] splits = Utils.splitData(sData, nbMappers);

    // prepare first step output
    TreeID[] keys = new TreeID[nbTrees];
    Node[] trees = new Node[nbTrees];
View Full Code Here

    // prepare the data
    String descriptor = Utils.randomDescriptor(rng, nbAttributes);
    double[][] source = Utils.randomDoubles(rng, descriptor, nbInstances);
    String[] sData = Utils.double2String(source);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    String[][] splits = Utils.splitData(sData, nbMappers);

    MockTreeBuilder treeBuilder = new MockTreeBuilder();

    LongWritable key = new LongWritable();
View Full Code Here

      log.error("No Decision Forest found!");
      return;
    }

    // load the dataset
    Dataset dataset = Dataset.load(getConf(), datasetPath);
    DataConverter converter = new DataConverter(dataset);

    log.info("Sequential classification...");
    long time = System.currentTimeMillis();

    Random rng = RandomUtils.getRandom();
    ResultAnalyzer analyzer = analyze ? new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown") : null;

    if (dataFS.getFileStatus(dataPath).isDir()) {
      //the input is a directory of files
      testDirectory(outputPath, converter, forest, dataset, analyzer, rng);
    else {
View Full Code Here

 
  private DecisionForest buildForest() throws IOException, ClassNotFoundException, InterruptedException {
    DefaultTreeBuilder treeBuilder = new DefaultTreeBuilder();
    treeBuilder.setM(m);
   
    Dataset dataset = Dataset.load(getConf(), datasetPath);
   
    ForestPredictions callback = isOob ? new ForestPredictions(dataset.nbInstances(), dataset.nblabels())
        : null;
   
    Builder forestBuilder;
   
    if (isPartial) {
View Full Code Here

      return -1;
    }
   
    // load the data
    FileSystem fs = dataPath.getFileSystem(new Configuration());
    Dataset dataset = Dataset.load(getConf(), datasetPath);
    Data data = DataLoader.loadData(dataset, fs, dataPath);
   
    // take m to be the first integer less than log2(M) + 1, where M is the
    // number of inputs
    int m = (int) Math.floor(Maths.log(2, data.getDataset().nbAttributes()) + 1);
View Full Code Here

    }

    DefaultTreeBuilder treeBuilder = new DefaultTreeBuilder();
    treeBuilder.setM(m);
   
    Dataset dataset = Dataset.load(getConf(), datasetPath);
   
    ForestPredictions callback = isOob ? new ForestPredictions(dataset.nbInstances(), dataset.nblabels())
        : null;
   
    Builder forestBuilder;
   
    if (isPartial) {
View Full Code Here

    // prepare the data
    String descriptor = Utils.randomDescriptor(rng, NUM_ATTRIBUTES);
    double[][] source = Utils.randomDoubles(rng, descriptor, NUM_INSTANCES);
    String[] sData = Utils.double2String(source);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    String[][] splits = Utils.splitData(sData, NUM_MAPPERS);

    // prepare first step output
    TreeID[] keys = new TreeID[NUM_TREES];
    Node[] trees = new Node[NUM_TREES];
View Full Code Here

    // prepare the data
    String descriptor = Utils.randomDescriptor(rng, NUM_ATTRIBUTES);
    double[][] source = Utils.randomDoubles(rng, descriptor, NUM_INSTANCES);
    String[] sData = Utils.double2String(source);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    String[][] splits = Utils.splitData(sData, NUM_MAPPERS);

    MockTreeBuilder treeBuilder = new MockTreeBuilder();

    LongWritable key = new LongWritable();
View Full Code Here

    int label = Utils.findLabel(descriptor);

    // all the vectors have the same label (0)
    double[][] temp = Utils.randomDoublesWithSameLabel(rng, descriptor, 100, 0);
    String[] sData = Utils.double2String(temp);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    Data data = DataLoader.loadData(dataset, sData);
    DefaultIgSplit iG = new DefaultIgSplit();

    double expected = 0.0 - 1.0 * Math.log(1.0) / Math.log(2.0);
    assertEquals(expected, iG.entropy(data), EPSILON);
View Full Code Here

TOP

Related Classes of org.apache.mahout.df.data.Dataset

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.