Examples of FileLineIterator


Examples of org.apache.mahout.common.iterator.FileLineIterator

    if (hasPrefValues) {

      if (loadFreshData) {

        FastByIDMap<Collection<Preference>> data = new FastByIDMap<Collection<Preference>>();
        FileLineIterator iterator = new FileLineIterator(dataFile, false);
        processFile(iterator, data, timestamps, false);

        for (File updateFile : findUpdateFilesAfter(newLastModified)) {
          processFile(new FileLineIterator(updateFile, false), data, timestamps, false);
        }

        return new GenericDataModel(GenericDataModel.toDataMap(data, true), timestamps);

      } else {

        FastByIDMap<PreferenceArray> rawData = ((GenericDataModel) delegate).getRawUserData();

        for (File updateFile : findUpdateFilesAfter(Math.max(oldLastUpdateFileModifieid, newLastModified))) {
          processFile(new FileLineIterator(updateFile, false), rawData, timestamps, true);
        }

        return new GenericDataModel(rawData, timestamps);

      }

    } else {

      if (loadFreshData) {

        FastByIDMap<FastIDSet> data = new FastByIDMap<FastIDSet>();
        FileLineIterator iterator = new FileLineIterator(dataFile, false);
        processFileWithoutID(iterator, data, timestamps);

        for (File updateFile : findUpdateFilesAfter(newLastModified)) {
          processFileWithoutID(new FileLineIterator(updateFile, false), data, timestamps);
        }

        return new GenericBooleanPrefDataModel(data, timestamps);

      } else {

        FastByIDMap<FastIDSet> rawData = ((GenericBooleanPrefDataModel) delegate).getRawUserData();

        for (File updateFile : findUpdateFilesAfter(Math.max(oldLastUpdateFileModifieid, newLastModified))) {
          processFileWithoutID(new FileLineIterator(updateFile, false), rawData, timestamps);
        }

        return new GenericBooleanPrefDataModel(rawData, timestamps);

      }
View Full Code Here

Examples of org.apache.mahout.common.iterator.FileLineIterator

  }
 
  @Override
  protected DataModel buildModel() throws IOException {
    FastByIDMap<Collection<Preference>> data = new FastByIDMap<Collection<Preference>>();
    FileLineIterator iterator = new FileLineIterator(getDataFile(), false);
    FastByIDMap<FastByIDMap<Long>> timestamps = new FastByIDMap<FastByIDMap<Long>>();
    processFile(iterator, data, timestamps, false);
    return new GenericDataModel(GenericDataModel.toDataMap(data, true));
  }
View Full Code Here

Examples of org.apache.mahout.common.iterator.FileLineIterator

  private final Iterator<GenericItemSimilarity.ItemItemSimilarity> delegate;

  FileItemItemSimilarityIterator(File similaritiesFile) throws IOException {
    delegate = Iterators.transform(
        new FileLineIterator(similaritiesFile),
        new Function<String, GenericItemSimilarity.ItemItemSimilarity>() {
          @Override
          public GenericItemSimilarity.ItemItemSimilarity apply(String from) {
            String[] tokens = SEPARATOR.split(from);
            return new GenericItemSimilarity.ItemItemSimilarity(Long.parseLong(tokens[0]),
View Full Code Here

Examples of org.apache.mahout.common.iterator.FileLineIterator

      try {

        averageDiffs.clear();
        allRecommendableItemIDs.clear();
       
        FileLineIterator iterator = new FileLineIterator(dataFile, false);
        String firstLine = iterator.peek();
        while (firstLine.isEmpty() || firstLine.charAt(0) == COMMENT_CHAR) {
          iterator.next();
          firstLine = iterator.peek();
        }
        long averageCount = 0L;
        while (iterator.hasNext()) {
          averageCount = processLine(iterator.next(), averageCount);
        }
       
        pruneInconsequentialDiffs();
        updateAllRecommendableItems();
       
View Full Code Here

Examples of org.apache.mahout.common.iterator.FileLineIterator

   
    StringBuilder content = new StringBuilder();
    content.append(header);
    NumberFormat decimalFormatter = new DecimalFormat("0000");
    File dumpFile = new File(dumpFilePath);
    FileLineIterator it;
    if (dumpFilePath.endsWith(".bz2")) {
      // default compression format from http://download.wikimedia.org
      CompressionCodec codec = new BZip2Codec();
      it = new FileLineIterator(codec.createInputStream(new FileInputStream(dumpFile)));
    } else {
      // assume the user has previously de-compressed the dump file
      it = new FileLineIterator(dumpFile);
    }
    int filenumber = 0;
    while (it.hasNext()) {
      String thisLine = it.next();
      if (thisLine.trim().startsWith("<page>")) {
        boolean end = false;
        while (!thisLine.trim().startsWith("</page>")) {
          content.append(thisLine).append('\n');
          if (it.hasNext()) {
            thisLine = it.next();
          } else {
            end = true;
            break;
          }
        }
View Full Code Here

Examples of org.apache.mahout.common.iterator.FileLineIterator

      FSDataOutputStream outputStream = fs.create(new Path(outputPath, "trainingSet/ratings.tsv"));
      writer = new BufferedWriter(new OutputStreamWriter(outputStream, Charsets.UTF_8));

      int ratingsProcessed = 0;
      for (File movieRatings : new File(trainingDataDir).listFiles()) {
        FileLineIterator lines = null;
        try  {
          lines = new FileLineIterator(movieRatings);
          boolean firstLineRead = false;
          String movieID = null;
          while (lines.hasNext()) {
            String line = lines.next();
            if (firstLineRead) {
              String[] tokens = SEPARATOR.split(line);
              String userID = tokens[0];
              String rating = tokens[1];
              writer.write(userID + TAB + movieID + TAB + rating + NEWLINE);
View Full Code Here

Examples of org.apache.mahout.common.iterator.FileLineIterator

  public DataFileIterator(File dataFile) throws IOException {
    if (dataFile == null || dataFile.isDirectory() || !dataFile.exists()) {
      throw new IllegalArgumentException("Bad data file: " + dataFile);
    }
    lineIterator = new FileLineIterator(dataFile);
  }
View Full Code Here

Examples of org.apache.mahout.common.iterator.FileLineIterator

      try {

        averageDiffs.clear();
        allRecommendableItemIDs.clear();
       
        FileLineIterator iterator = new FileLineIterator(dataFile, false);
        String firstLine = iterator.peek();
        while (firstLine.length() == 0 || firstLine.charAt(0) == COMMENT_CHAR) {
          iterator.next();
          firstLine = iterator.peek();
        }
        char delimiter = FileDataModel.determineDelimiter(firstLine);
        long averageCount = 0L;
        while (iterator.hasNext()) {
          averageCount = processLine(iterator.next(), delimiter, averageCount);
        }
       
        pruneInconsequentialDiffs();
        updateAllRecommendableItems();
       
View Full Code Here

Examples of org.apache.mahout.common.iterator.FileLineIterator

   * @param path
   *          data path
   */
  public static int[] extractLabels(Dataset dataset, FileSystem fs, Path path) throws IOException {
    FSDataInputStream input = fs.open(path);
    FileLineIterator iterator = new FileLineIterator(input);
   
    int[] labels = new int[dataset.nbInstances()];
    DataConverter converter = new DataConverter(dataset);
   
    int index = 0;
   
    while (iterator.hasNext()) {
      labels[index++] = converter.convert(0, iterator.next()).getLabel();
    }
   
    iterator.close();
   
    return labels;
  }
View Full Code Here

Examples of org.apache.mahout.common.iterator.FileLineIterator

    this.dataFile = dataFile.getAbsoluteFile();
    this.lastModified = dataFile.lastModified();
    this.lastUpdateFileModified = readLastUpdateFileModified();

    FileLineIterator iterator = new FileLineIterator(dataFile, false);
    String firstLine = iterator.peek();
    while (firstLine.length() == 0 || firstLine.charAt(0) == COMMENT_CHAR) {
      iterator.next();
      firstLine = iterator.peek();
    }
    iterator.close();

    delimiter = determineDelimiter(firstLine);
    delimiterPattern = Pattern.compile(String.valueOf(delimiter));
    String[] firstLineSplit = delimiterPattern.split(firstLine);
    // If preference value exists and isn't empty then the file is specifying pref values
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.