Package org.apache.mahout.common

Examples of org.apache.mahout.common.FileLineIterable


   * @throws IOException
   *             if there is an error
   */
  public static List<Vector> readFile(String fileName) throws IOException {
    List<Vector> results = new ArrayList<Vector>();
    for (String line : new FileLineIterable(new File(fileName))) {
      results.add(AbstractVector.decodeVector(line));
    }
    return results;
  }
View Full Code Here


   * @throws IOException
   *             if there is an error
   */
  public static List<Vector> readFile(String fileName) throws IOException {
    List<Vector> results = new ArrayList<Vector>();
    for (String line : new FileLineIterable(new File(fileName))) {
      results.add(AbstractVector.decodeVector(line));
    }
    return results;
  }
View Full Code Here

    Charset charset = Charset.forName("UTF-8");
    BayesFileFormatter.collapse("animal", analyzer, input, charset, new File(out, "animal"));
    files = out.listFiles();
    assertEquals("files Size: " + files.length + " is not: " + 1, 1, files.length);
    int count = 0;
    for (String line : new FileLineIterable(files[0])) {
      assertTrue("line does not start with label", line.startsWith("animal"));
      System.out.println("Line: " + line);
      count++;
    }
    assertEquals(count + " does not equal: " + words.length, count, words.length);
View Full Code Here

    File outDir = new File("output/points");
    assertTrue("output dir exists?", outDir.exists());
    String[] outFiles = outDir.list();
    assertEquals("output dir files?", 4, outFiles.length);
    DummyOutputCollector<Text, Text> collector = new DummyOutputCollector<Text, Text>();
    for (String line : new FileLineIterable(new File("output/points/part-00000"))) {
      String[] lineParts = line.split("\t");
      assertEquals("line parts", 2, lineParts.length);
      String cl = line.substring(0, line.indexOf(':'));
      collector.collect(new Text(cl), new Text(lineParts[1]));
    }
View Full Code Here

      resultFile.delete();
    }
    PrintWriter writer = null;
    try {
      writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(resultFile), Charset.forName("UTF-8")));
      for (String line : new FileLineIterable(originalFile, false)) {
        String convertedLine = COLON_DELIMITER_PATTERN.matcher(line.substring(0, line.lastIndexOf(COLON_DELIMTER))).replaceAll(",");
        writer.println(convertedLine);
      }
      writer.flush();
    } catch (IOException ioe) {
View Full Code Here

    File resultFile = new File(new File(System.getProperty("java.io.tmpdir")), "taste.bookcrossing.txt");
    resultFile.delete();
    PrintWriter writer = null;
    try {
      writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(resultFile), Charset.forName("UTF-8")));
      for (String line : new FileLineIterable(originalFile, true)) {
        // Delete replace anything that isn't numeric, or a semicolon delimiter. Make comma the delimiter.
        String convertedLine = NON_DIGIT_SEMICOLON_PATTERN.matcher(line).replaceAll("").replace(';', ',');
        // If this means we deleted an entire ID -- few cases like that -- skip the line
        if (convertedLine.contains(",,")) {
          continue;
View Full Code Here

        log.info("Testing: " + file);
        String correctLabel = file.getName().split(".txt")[0];
        final TimingStatistics operationStats = new TimingStatistics();

        long lineNum = 0;
        for (String line : new FileLineIterable(new File(file.getPath()),
            Charset.forName(params.get("encoding")), false)) {

          Map<String, List<String>> document = new NGrams(line, Integer
              .parseInt(params.get("gramSize"))).generateNGrams();
          for (Map.Entry<String, List<String>> stringListEntry : document
View Full Code Here

   
    DataSet dataset = FileInfoParser.parseFile(fs, inpath);
    DataSet.initialize(dataset);

    DataLine dl = new DataLine();
    for (String line : new FileLineIterable(new File("target/test-classes/wdbc/wdbc.data"))) {
      dl.set(line);
      for (int index = 0; index < dataset.getNbAttributes(); index++) {
        if (dataset.isNumerical(index)) {
          assertInRange(dl.getAttribut(index), dataset.getMin(index), dataset
              .getMax(index));
View Full Code Here

TOP

Related Classes of org.apache.mahout.common.FileLineIterable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.