Package com.cloudera.recordbreaker.learnstructure

Examples of com.cloudera.recordbreaker.learnstructure.LearnStructure


      Path schemaFile = new Path(tmpSingletonDir.getCanonicalPath(), LearnStructure.SCHEMA_FILENAME);
      Path parseTreeFile = new Path(tmpSingletonDir.getCanonicalPath(), LearnStructure.PARSER_FILENAME);
      Path jsonDataFile = new Path(tmpSingletonDir.getCanonicalPath(), LearnStructure.JSONDATA_FILENAME);
      Path avroFile = new Path(tmpSingletonDir.getCanonicalPath(), LearnStructure.DATA_FILENAME);

      LearnStructure ls = new LearnStructure();
      // Check to see how many records exist in the original input
      int lineCount = 0;
      BufferedReader in2 = new BufferedReader(new FileReader(inputData));
      try {
        while (in2.readLine() != null) {
          lineCount++;
        }
      } finally {
        in2.close();
      }

      // Infer structure
      ls.inferRecordFormat(localFS, new Path(inputData.getCanonicalPath()), localFS, schemaFile, parseTreeFile, jsonDataFile, avroFile, false, lineCount);

      // Test the inferred structure
      // First, load in the avro file and see how many records there are.
      int avroCount = 0;
      DataFileReader in = new DataFileReader(new File(avroFile.toString()), new GenericDatumReader());
View Full Code Here


  }

  int randId;
  void computeSchema() throws IOException {
    this.randId = new Random().nextInt();   
    LearnStructure ls = new LearnStructure();
    FileSystem fs = FSAnalyzer.getInstance().getFS();
    FileSystem localFS = FileSystem.getLocal(new Configuration());
    Path inputPath = dd.getFilename();

    File workingParserFile = File.createTempFile("textdesc", "typetree", null);
    File workingSchemaFile = File.createTempFile("textdesc", "schema", null);
   
    ls.inferRecordFormat(fs, inputPath, localFS, new Path(workingSchemaFile.getCanonicalPath()), new Path(workingParserFile.getCanonicalPath()), null, null, false, MAX_LINES);

    this.schema = Schema.parse(workingSchemaFile);
    DataInputStream in = new DataInputStream(localFS.open(new Path(workingParserFile.getCanonicalPath())));
    try {
      this.typeTree = InferredType.readType(in);
View Full Code Here

TOP

Related Classes of com.cloudera.recordbreaker.learnstructure.LearnStructure

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.