Examples of FileLineIterable


Examples of org.apache.mahout.common.FileLineIterable

        Path unqualifiedUsersFilePath = new Path(usersFilePathString);
        FileSystem fs = FileSystem.get(unqualifiedUsersFilePath.toUri(), jobConf);
        usersToRecommendFor = new FastIDSet();
        Path usersFilePath = unqualifiedUsersFilePath.makeQualified(fs);
        in = fs.open(usersFilePath);
        for (String line : new FileLineIterable(in)) {
          usersToRecommendFor.add(Long.parseLong(line));
        }    
      } catch (IOException ioe) {
        throw new IllegalStateException(ioe);
      } finally {
View Full Code Here

Examples of org.apache.mahout.common.FileLineIterable

    File resultFile = new File(new File(System.getProperty("java.io.tmpdir")), "taste.bookcrossing.txt");
    resultFile.delete();
    PrintWriter writer = null;
    try {
      writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(resultFile), Charset.forName("UTF-8")));
      for (String line : new FileLineIterable(originalFile, true)) {
        // 0 ratings are basically "no rating", ignore them (thanks h.9000)
        if (line.endsWith("\"0\"")) {
          continue;
        }
        // Delete replace anything that isn't numeric, or a semicolon delimiter. Make comma the delimiter.
View Full Code Here

Examples of org.apache.mahout.common.FileLineIterable

   
    DataSet dataset = FileInfoParser.parseFile(fs, inpath);
    DataSet.initialize(dataset);

    DataLine dl = new DataLine();
    for (String line : new FileLineIterable(new File(Resources.getResource("wdbc/wdbc.data").getPath()))) {
      dl.set(line);
      for (int index = 0; index < dataset.getNbAttributes(); index++) {
        if (dataset.isNumerical(index)) {
          CDMutationTest.assertInRange(dl.getAttribute(index), dataset.getMin(index), dataset
              .getMax(index));
View Full Code Here

Examples of org.apache.mahout.common.FileLineIterable

        current.listFiles(new PrefixAdditionFilter(prefix + File.separator + current.getName(), writer,
            charset));
      } else {
        try {
          StringBuilder file = new StringBuilder();
          for (String aFit : new FileLineIterable(current, charset, false)) {
            file.append(aFit).append('\n');
          }
          writer.write(prefix + File.separator + current.getName(), file.toString());
         
        } catch (FileNotFoundException e) {
View Full Code Here

Examples of org.apache.mahout.common.FileLineIterable

      itemIDPadded.insert(0, '0');
    }
    List<Preference> prefs = new ArrayList<Preference>();
    File movieFile = new File(new File(dataDirectory, "training_set"), "mv_00" + itemIDPadded + ".txt");
    try {
      for (String line : new FileLineIterable(movieFile, true)) {
        int firstComma = line.indexOf(',');
        Integer userID = Integer.valueOf(line.substring(0, firstComma));
        int secondComma = line.indexOf(',', firstComma + 1);
        float rating = Float.parseFloat(line.substring(firstComma + 1, secondComma));
        prefs.add(new GenericPreference(userID, itemID, rating));
View Full Code Here

Examples of org.apache.mahout.common.FileLineIterable

   
    Map<String, List<String>> byUserEntryCache = new FastMap<String, List<String>>(100000);
   
    for (File byItemFile : byItemDirectory.listFiles()) {
      log.info("Processing {}", byItemFile);
      Iterator<String> lineIterator = new FileLineIterable(byItemFile, false).iterator();
      String line = lineIterator.next();
      String movieIDString = line.substring(0, line.length() - 1);
      while (lineIterator.hasNext()) {
        line = lineIterator.next();
        int firstComma = line.indexOf(',');
View Full Code Here

Examples of org.apache.mahout.common.FileLineIterable

      resultFile.delete();
    }
    PrintWriter writer = null;
    try {
      writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(resultFile), Charset.forName("UTF-8")));
      for (String line : new FileLineIterable(originalFile, false)) {
        int lastDelimiterStart = line.lastIndexOf(COLON_DELIMTER);
        if (lastDelimiterStart < 0) {
          throw new IOException("Unexpected input format on line: " + line);
        }
        String subLine = line.substring(0, lastDelimiterStart);
View Full Code Here

Examples of org.apache.mahout.common.FileLineIterable

    FastByIDMap<Collection<Preference>> userIDPrefMap = new FastByIDMap<Collection<Preference>>();
   
    int counter = 0;
    FilenameFilter filenameFilter = new MovieFilenameFilter();
    for (File movieFile : new File(dataDirectory, "training_set").listFiles(filenameFilter)) {
      Iterator<String> lineIterator = new FileLineIterable(movieFile, false).iterator();
      String line = lineIterator.next();
      long movieID = Long.parseLong(line.substring(0, line.length() - 1)); // strip colon
      while (lineIterator.hasNext()) {
        line = lineIterator.next();
        if (++counter % 100000 == 0) {
View Full Code Here

Examples of org.apache.mahout.common.iterator.FileLineIterable

  public static FastIDSet[] parseMenWomen(File genderFile)
      throws IOException {
    FastIDSet men = new FastIDSet(50000);
    FastIDSet women = new FastIDSet(50000);
    for (String line : new FileLineIterable(genderFile)) {
      int comma = line.indexOf(',');
      char gender = line.charAt(comma + 1);
      if (gender == 'U') {
        continue;
      }
View Full Code Here

Examples of org.apache.mahout.common.iterator.FileLineIterable

   
    DataSet dataset = FileInfoParser.parseFile(fs, inpath);
    DataSet.initialize(dataset);

    DataLine dl = new DataLine();
    for (CharSequence line : new FileLineIterable(new File(Resources.getResource("wdbc/wdbc.data").toURI()))) {
      dl.set(line);
      for (int index = 0; index < dataset.getNbAttributes(); index++) {
        if (dataset.isNumerical(index)) {
          CDMutationTest.assertInRange(dl.getAttribute(index), dataset.getMin(index), dataset
              .getMax(index));
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.