Package com.cloudera.crunch.type

Examples of com.cloudera.crunch.type.PTypeFamily


    // Execute the pipeline as a MapReduce.
    pipeline.done();
  }

  public static PCollection<String> extractFilterResources(PCollection<CommonLogEntry> logs) {
    PTypeFamily tf = logs.getTypeFamily();
    return logs.parallelDo(
        "resource-extract-filter",
        new DoFn<CommonLogEntry, String>() {
          @Override
          public void process(CommonLogEntry input, Emitter<String> emitter) {
            if (!"127.0.0.1".equals(input.getRemoteAddress())) {
              emitter.emit(input.getResource());
            }
          }
        }, tf.strings());
  }
View Full Code Here


      }
    });
  }

  public static PTable<String, String> extractWordFileTable(PCollection<String> lines) {
    PTypeFamily tf = lines.getTypeFamily();
    return lines.parallelDo(
        "inverted-index",
        new DoFn<String, Pair<String, String>>() {
          String filename;

          @Override
          public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
            super.setContext(context);
            filename = ((FileSplit)
                ((MapContext) context).getInputSplit()).getPath().getName();
          }

          @Override
          public void process(String line,
                              Emitter<Pair<String, String>> emitter) {
            for (String word : StringUtils.split(line)) {
              Pair<String, String> pair =
                  Pair.of(word.toLowerCase(), filename);
              emitter.emit(pair);
            }
          }
        }, tf.tableOf(tf.strings(), tf.strings()));
  }
View Full Code Here

  public static enum LogCounters {
    LOG_LINE_ERRORS
  }

  public static PCollection<CommonLogEntry> logs(PCollection<String> lines) {
    PTypeFamily tf = lines.getTypeFamily();
    return lines
        .parallelDo(new DoFn<String, CommonLogEntry>() {
          transient ApacheCommonLogReader logReader;
          transient Logger log;

          @Override
          public void initialize() {
            logReader = new ApacheCommonLogReader();
            log = LoggerFactory.getLogger(CrunchUtils.class);
          }

          @Override
          public void process(String input, Emitter<CommonLogEntry> emitter) {
            try {
              CommonLogEntry log = logReader.decodeLine(input);
              if(log != null) {
                emitter.emit(log);
              } else {
                processingError(input, null);
              }
            } catch (IOException e) {
              processingError(input, e);
            }
          }

          void processingError(String line, @Nullable Throwable t) {
            super.getCounter(LogCounters.LOG_LINE_ERRORS).increment(1);
            log.error("Hit exception parsing line '" + line + "'", t);
          }
        }, tf.records(CommonLogEntry.class));
  }
View Full Code Here

      System.out.println(j.first() + " " + j.second().first());
    }
  }

  public static PTable<String, CommonLogEntry> logsAsIpTable(PCollection<CommonLogEntry> logs) {
    PTypeFamily tf = logs.getTypeFamily();
    return logs.parallelDo(
        "logs-to-ip-table",
        new DoFn<CommonLogEntry, Pair<String, CommonLogEntry>>() {
          @Override
          public void process(CommonLogEntry input, Emitter<Pair<String, CommonLogEntry>> emitter) {
            emitter.emit(Pair.of(input.getRemoteAddress(), input));
          }
        }, tf.tableOf(tf.strings(), tf.records(CommonLogEntry.class)));
  }
View Full Code Here

          }
        }, tf.tableOf(tf.strings(), tf.records(CommonLogEntry.class)));
  }

  public static PTable<String, String> ipsAndUsers(PCollection<String> ipUsers) {
    PTypeFamily tf = ipUsers.getTypeFamily();
    return ipUsers.parallelDo(
        "extract-users",
        new DoFn<String, Pair<String, String>>() {
          @Override
          public void process(String input, Emitter<Pair<String, String>> emitter) {
            // first token is the IP address, and second is the username
            String[] parts = StringUtils.split(input);
            emitter.emit(Pair.of(parts[0], parts[1]));
          }
        }, tf.tableOf(tf.strings(), tf.strings()));
  }
View Full Code Here

TOP

Related Classes of com.cloudera.crunch.type.PTypeFamily

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.