Package org.apache.mahout.common

Examples of org.apache.mahout.common.FileLineIterable


        Path unqualifiedItemsFilePath = new Path(itemFilePathString);
        FileSystem fs = FileSystem.get(unqualifiedItemsFilePath.toUri(), jobConf);
        itemsToRecommendFor = new FastIDSet();
        Path itemsFilePath = unqualifiedItemsFilePath.makeQualified(fs);
        in = fs.open(itemsFilePath);
        for (String line : new FileLineIterable(in)) {
          itemsToRecommendFor.add(Long.parseLong(line));
        }
      }
    } catch (IOException ioe) {
      throw new IllegalStateException(ioe);
View Full Code Here


  }

  private static void loadPopulation(FileSystem fs, Path f,
                                     Collection<DummyCandidate> population) throws IOException {
    FSDataInputStream in = fs.open(f);
    for (String line : new FileLineIterable(in)) {
      population.add(StringUtils.<DummyCandidate>fromString(line));
    }
  }
View Full Code Here

   }


  static Map<Long,List<RecommendedItem>> readRecommendations(File file) throws IOException {
    Map<Long,List<RecommendedItem>> recommendations = new HashMap<Long,List<RecommendedItem>>();
    Iterable<String> lineIterable = new FileLineIterable(file);
    for (String line : lineIterable) {

      String[] keyValue = line.split("\t");
      long userID = Long.parseLong(keyValue[0]);
      String[] tokens = keyValue[1].replaceAll("\\[", "")
View Full Code Here

    Charset charset = Charset.forName("UTF-8");
    BayesFileFormatter.collapse("animal", analyzer, input, charset, new File(out, "animal"));
    files = out.listFiles();
    assertEquals("files Size: " + files.length + " is not: " + 1, 1, files.length);
    int count = 0;
    for (String line : new FileLineIterable(files[0])) {
      assertTrue("line does not start with label", line.startsWith("animal"));
      count++;
    }
    assertEquals(count + " does not equal: " + WORDS.length, count, WORDS.length);
  }
View Full Code Here

    if (dfs.exists(outPath)) {
      dfs.delete(outPath, true);
    }

    Set<String> categories = new HashSet<String>();
    for (String line : new FileLineIterable(new File(catFile))) {
      categories.add(line.trim().toLowerCase());
    }

    DefaultStringifier<Set<String>> setStringifier = new DefaultStringifier<Set<String>>(conf, GenericsUtil.getClass(categories));
View Full Code Here

      itemIDPadded.insert(0, '0');
    }
    List<Preference> prefs = new ArrayList<Preference>();
    File movieFile = new File(new File(dataDirectory, "training_set"), "mv_00" + itemIDPadded + ".txt");
    try {
      for (String line : new FileLineIterable(movieFile, true)) {
        int firstComma = line.indexOf((int) ',');
        Integer userID = Integer.valueOf(line.substring(0, firstComma));
        int secondComma = line.indexOf((int) ',', firstComma + 1);
        float rating = Float.parseFloat(line.substring(firstComma + 1, secondComma));
        prefs.add(new GenericPreference(userID, itemID, rating));
View Full Code Here

    Map<String, List<String>> byUserEntryCache = new FastMap<String, List<String>>(100000);

    for (File byItemFile : byItemDirectory.listFiles()) {
      log.info("Processing {}", byItemFile);
      Iterator<String> lineIterator = new FileLineIterable(byItemFile, false).iterator();
      String line = lineIterator.next();
      String movieIDString = line.substring(0, line.length() - 1);
      while (lineIterator.hasNext()) {
        line = lineIterator.next();
        int firstComma = line.indexOf((int) ',');
View Full Code Here

    FastByIDMap<Collection<Preference>> userIDPrefMap = new FastByIDMap<Collection<Preference>>();

    int counter = 0;
    FilenameFilter filenameFilter = new MovieFilenameFilter();
    for (File movieFile : new File(dataDirectory, "training_set").listFiles(filenameFilter)) {
      Iterator<String> lineIterator = new FileLineIterable(movieFile, false).iterator();
      String line = lineIterator.next();
      long movieID = Long.parseLong(line.substring(0, line.length() - 1)); // strip colon
      while (lineIterator.hasNext()) {
        line = lineIterator.next();
        if (++counter % 100000 == 0) {
View Full Code Here

  }

  private static void loadPopulation(FileSystem fs, Path f,
                                     Collection<DummyCandidate> population) throws IOException {
    FSDataInputStream in = fs.open(f);
    for (String line : new FileLineIterable(in)) {
      population.add((DummyCandidate) StringUtils.fromString(line));
    }
  }
View Full Code Here

    FPGrowth<String> fp = new FPGrowth<String>();
    Set<String> features = new HashSet<String>();

    fp.generateTopKFrequentPatterns(new StringRecordIterator(
        new FileLineIterable(new File(input), encoding, false), pattern), fp
        .generateFList(new StringRecordIterator(new FileLineIterable(new File(
            input), encoding, false), pattern), minSupport), minSupport,
        maxHeapSize, features, new StringOutputConvertor(
            new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)));
    writer.close();
View Full Code Here

TOP

Related Classes of org.apache.mahout.common.FileLineIterable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.