Examples of StringRecordIterator


Examples of org.apache.mahout.common.iterator.StringRecordIterator

    final Map<Set<String>,Long> seqResult = Maps.newHashMap();
   
    FPGrowthObj<String> fpSeq = new FPGrowthObj<String>();
    fpSeq.generateTopKFrequentPatterns(
      new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),

      fpSeq.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename)
           .openStream()), "\\s+"), minSupport), minSupport, 1000000,
      null,
      new OutputCollector<String,List<Pair<List<String>,Long>>>() {
       
        @Override
View Full Code Here

Examples of org.apache.mahout.common.iterator.StringRecordIterator

    FPGrowthObj<String> fp = new FPGrowthObj<String>();
   
    String inputFilename = "FPGsynth.dat";
    int minSupport = 50;

    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
                                                                                                  inputFilename).openStream()), "\\s+");
    int patternCnt_10_13_1669 = 0;
    int patternCnt_10_13 = 0;
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = next.getFirst();
      if (items.contains("10") && items.contains("13")) {
        patternCnt_10_13++;
        if (items.contains("1669")) {
          patternCnt_10_13_1669++;
        }
      }
    }
   
    if (patternCnt_10_13_1669 < minSupport)
      throw new IllegalStateException("the test is broken or data is missing ("
                                      + patternCnt_10_13_1669+", "
                                      + patternCnt_10_13+")");

    final Map<Set<String>,Long> results = Maps.newHashMap();
   
    Set<String> features_10_13 = new HashSet<String>();
    features_10_13.add("10");
    features_10_13.add("13");

    Set<String> returnableFeatures = new HashSet<String>();
    returnableFeatures.add("10");
    returnableFeatures.add("13");
    returnableFeatures.add("1669");
   
    fp.generateTopKFrequentPatterns(
                                    new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),

                                    fp.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename)
                                                                                                   .openStream()), "\\s+"), minSupport), minSupport, 100000,
                                    returnableFeatures,
                                    new OutputCollector<String,List<Pair<List<String>,Long>>>() {
       
                                      @Override
View Full Code Here

Examples of org.apache.mahout.common.iterator.StringRecordIterator

    FPGrowth<String> fp1 = new FPGrowth<String>();

    final Map<Set<String>,Long> results1 = Maps.newHashMap();
   
    fp1.generateTopKFrequentPatterns(
                                     new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),

                                     fp1.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename)
                                                                                                     .openStream()), "\\s+"), minSupport), minSupport, 1000000,
                                     returnableFeatures,
                                     new OutputCollector<String,List<Pair<List<String>,Long>>>() {
       
                                       @Override
                                         public void collect(String key, List<Pair<List<String>,Long>> value) {
         
                                         for (Pair<List<String>,Long> v : value) {
                                           List<String> l = v.getFirst();
                                           results1.put(new HashSet<String>(l), v.getSecond());
                                           System.out.println("found pat ["+v.getSecond()+"]: "+ v.getFirst());
                                         }
                                       }
       
                                     }, new StatusUpdater() {
       
                                         @Override
                                           public void update(String status) {}
                                       });

    FPGrowthObj<String> fp2 = new FPGrowthObj<String>();
    final Map<Set<String>,Long> initialResults2 = Maps.newHashMap();
    fp2.generateTopKFrequentPatterns(
                                     new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),

                                     fp2.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename)
                                                                                                     .openStream()), "\\s+"), minSupport), minSupport, 1000000,
                                     new HashSet<String>(),
                                     new OutputCollector<String,List<Pair<List<String>,Long>>>() {
       
                                       @Override
View Full Code Here

Examples of org.apache.mahout.common.iterator.StringRecordIterator

        = new org.apache.mahout.fpm.pfpgrowth.fpgrowth2.FPGrowthObj<String>();
      Collection<String> features = new HashSet<String>();

      try {
        fp.generateTopKFrequentPatterns(
                new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
                fp.generateFList(
                        new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
                        minSupport),
                minSupport,
                maxHeapSize,
                features,
                new StringOutputConverter(new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
                new ContextStatusUpdater(null));
        } finally {
          Closeables.closeQuietly(writer);
        }
    } else {
      FPGrowth<String> fp = new FPGrowth<String>();
      Collection<String> features = new HashSet<String>();
      try {
        fp.generateTopKFrequentPatterns(
                new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
                fp.generateFList(
                        new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
                        minSupport),
                minSupport,
                maxHeapSize,
                features,
                new StringOutputConverter(new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
View Full Code Here

Examples of org.apache.mahout.common.iterator.StringRecordIterator

      try {
        inputStream = fs.open(input);
        inputStreamAgain = fs.open(input);
        fp.generateTopKFrequentPatterns(
                new StringRecordIterator(new FileLineIterable(inputStream, encoding, false), pattern),
                fp.generateFList(
                        new StringRecordIterator(new FileLineIterable(inputStreamAgain, encoding, false), pattern),
                        minSupport),
                minSupport,
                maxHeapSize,
                features,
                new StringOutputConverter(new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
                new ContextStatusUpdater(null));
      } finally {
        Closeables.close(writer, false);
        Closeables.close(inputStream, true);
        Closeables.close(inputStreamAgain, true);
      }
    } else {
      FPGrowth<String> fp = new FPGrowth<String>();


      inputStream = fs.open(input);
      inputStreamAgain = fs.open(input);
      try {
        fp.generateTopKFrequentPatterns(
                new StringRecordIterator(new FileLineIterable(inputStream, encoding, false), pattern),
                fp.generateFList(
                        new StringRecordIterator(new FileLineIterable(inputStreamAgain, encoding, false), pattern),
                        minSupport),
                minSupport,
                maxHeapSize,
                features,
                new StringOutputConverter(new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
View Full Code Here

Examples of org.apache.mahout.common.iterator.StringRecordIterator

    File outputDir2 = getTestTempDir("frequentpatterns2");
    paramsImpl2.set(PFPGrowth.OUTPUT, outputDir2.getAbsolutePath());

    Writer writer = Files.newWriter(input, Charsets.UTF_8);
    try {
      StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
        "retail.dat").openStream()), "\\s+");
      Collection<List<String>> transactions = Lists.newArrayList();
     
      while (it.hasNext()) {
        Pair<List<String>,Long> next = it.next();
        transactions.add(next.getFirst());
      }
     
      for (List<String> transaction : transactions) {
        String sep = "";
View Full Code Here

Examples of org.apache.mahout.common.iterator.StringRecordIterator

  @Test
  public void testSpecificCaseFromRetailDataMinSup500() throws IOException {
    FPGrowthObj<String> fp = new FPGrowthObj<String>();
   
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail.dat").openStream()), "\\s+");
    int pattern_41_36_39 = 0;
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = next.getFirst();
      if (items.contains("41") && items.contains("36") && items.contains("39")) {
        pattern_41_36_39++;
      }
    }
   
    final Map<Set<String>,Long> results = Maps.newHashMap();
   
    Set<String> returnableFeatures = Sets.newHashSet();
    returnableFeatures.add("41");
    returnableFeatures.add("36");
    returnableFeatures.add("39");
   
    fp.generateTopKFrequentPatterns(
      new StringRecordIterator(new FileLineIterable(Resources.getResource("retail.dat").openStream()), "\\s+"),

      fp.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource("retail.dat")
          .openStream()), "\\s+"), 500), 500, 1000, returnableFeatures,
      new OutputCollector<String,List<Pair<List<String>,Long>>>() {
       
        @Override
        public void collect(String key, List<Pair<List<String>,Long>> value) {
View Full Code Here

Examples of org.apache.mahout.common.iterator.StringRecordIterator

    File input = new File(inputDir, "synth_test.txt");
    params.set(PFPGrowth.INPUT, input.getAbsolutePath());
    params.set(PFPGrowth.OUTPUT, outputDir.getAbsolutePath());
    Writer writer = Files.newWriter(input, Charsets.UTF_8);
    try {
      StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
        "FPGsynth.dat").openStream()), "\\s+");
      Collection<List<String>> transactions = Lists.newArrayList();
     
      while (it.hasNext()) {
        Pair<List<String>,Long> next = it.next();
        transactions.add(next.getFirst());
      }
     
      for (List<String> transaction : transactions) {
        String sep = "";
View Full Code Here

Examples of org.apache.mahout.common.iterator.StringRecordIterator

    final Map<Set<String>,Long> seqResult = Maps.newHashMap();
   
    FPGrowthObj<String> fpSeq = new FPGrowthObj<String>();
    fpSeq.generateTopKFrequentPatterns(
      new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),

      fpSeq.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename)
           .openStream()), "\\s+"), minSupport), minSupport, 1000000,
      null,
      new OutputCollector<String,List<Pair<List<String>,Long>>>() {
       
        @Override
View Full Code Here

Examples of org.apache.mahout.common.iterator.StringRecordIterator

  public void testSpecificCasesFromSynthData() throws IOException {
    FPGrowthObj<String> fp = new FPGrowthObj<String>();
   
    String inputFilename = "FPGsynth.dat";

    StringRecordIterator it =
        new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+");
    int patternCnt_10_13_1669 = 0;
    int patternCnt_10_13 = 0;
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = next.getFirst();
      if (items.contains("10") && items.contains("13")) {
        patternCnt_10_13++;
        if (items.contains("1669")) {
          patternCnt_10_13_1669++;
        }
      }
    }

    int minSupport = 50;
    if (patternCnt_10_13_1669 < minSupport) {
      throw new IllegalStateException("the test is broken or data is missing ("
                                          + patternCnt_10_13_1669 + ", "
                                          + patternCnt_10_13 + ')');
    }

    final Map<Set<String>,Long> results = Maps.newHashMap();
   
    Set<String> features_10_13 = Sets.newHashSet();
    features_10_13.add("10");
    features_10_13.add("13");

    Set<String> returnableFeatures = Sets.newHashSet();
    returnableFeatures.add("10");
    returnableFeatures.add("13");
    returnableFeatures.add("1669");
   
    fp.generateTopKFrequentPatterns(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),

                                    fp.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename)
                                                                                                   .openStream()), "\\s+"), minSupport), minSupport, 100000,
                                    returnableFeatures,
                                    new OutputCollector<String,List<Pair<List<String>,Long>>>() {
       
                                      @Override
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.