Package mia.recommender.ch06

Source Code of mia.recommender.ch06.WikipediaItemIDIndexMapper

package mia.recommender.ch06;

import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.mahout.math.VarIntWritable;
import org.apache.mahout.math.VarLongWritable;

public final class WikipediaItemIDIndexMapper extends
    Mapper<LongWritable,Text,VarIntWritable, VarLongWritable> {

  private static final Pattern NUMBERS = Pattern.compile("(\\d+)");

  @Override
  protected void map(LongWritable key,
                     Text value,
                     Context context) throws IOException, InterruptedException {
    String line = value.toString();
    Matcher m = NUMBERS.matcher(line);
    m.find();
    VarIntWritable index = new VarIntWritable();
    VarLongWritable itemID = new VarLongWritable();
    while (m.find()) {
      long item = Long.parseLong(m.group());
      itemID.set(item);
      index.set(idToIndex(item));
      context.write(index, itemID);
    }
  }

  static int idToIndex(long itemID) {
    return 0x7FFFFFFF & ((int) itemID ^ (int) (itemID >>> 32));
  }

}
TOP

Related Classes of mia.recommender.ch06.WikipediaItemIDIndexMapper

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.