package com.linkedin.camus.etl.kafka.common;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.Map;
import kafka.api.PartitionOffsetRequestInfo;
import kafka.common.TopicAndPartition;
import kafka.javaapi.OffsetRequest;
import kafka.javaapi.OffsetResponse;
import kafka.javaapi.consumer.SimpleConsumer;
import org.apache.hadoop.io.UTF8;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.log4j.Logger;
import com.linkedin.camus.etl.kafka.CamusJob;
import com.linkedin.camus.workallocater.CamusRequest;
/**
* A class that represents the kafka pull request.
*
* The class is a container for topic, leaderId, partition, uri and offset. It is
* used in reading and writing the sequence files used for the extraction job.
*
* @author Richard Park
*/
public class EtlRequest implements CamusRequest {
private static Logger log = Logger.getLogger(EtlRequest.class);
private JobContext context = null;
private static final long DEFAULT_OFFSET = 0;
private String topic = "";
private String leaderId = "";
private int partition = 0;
private URI uri = null;
private long offset = DEFAULT_OFFSET;
private long latestOffset = -1;
private long earliestOffset = -2;
private long avgMsgSize = 1024;
public EtlRequest() {
}
public EtlRequest(EtlRequest other) {
this.topic = other.topic;
this.leaderId = other.leaderId;
this.partition = other.partition;
this.uri = other.uri;
this.offset = other.offset;
this.latestOffset = other.latestOffset;
this.earliestOffset = other.earliestOffset;
this.avgMsgSize = other.avgMsgSize;
}
/* (non-Javadoc)
* @see com.linkedin.camus.etl.kafka.common.CamusRequest#setLatestOffset(long)
*/
@Override
public void setLatestOffset(long latestOffset) {
this.latestOffset = latestOffset;
}
/* (non-Javadoc)
* @see com.linkedin.camus.etl.kafka.common.CamusRequest#setEarliestOffset(long)
*/
@Override
public void setEarliestOffset(long earliestOffset) {
this.earliestOffset = earliestOffset;
}
public void setAvgMsgSize(long size) {
this.avgMsgSize = size;
}
/**
* Constructor for a KafkaETLRequest with the uri set to null and offset set
* to -1. Both of these attributes can be set later. These attributes are
* sufficient to ensure uniqueness.
*
* @param topic
* The topic name
* @param leaderId
* The leader broker for this partition and topic
* @param partition
* The partition to pull
*/
public EtlRequest(JobContext context, String topic, String leaderId, int partition) {
this(context, topic, leaderId, partition, null, DEFAULT_OFFSET);
}
/**
* Constructor for the KafkaETLRequest with the offset to -1.
*
* @param topic
* The topic name
* @param leaderId
* The leader broker for this topic and partition
* @param partition
* The partition to pull
* @param brokerUri
* The uri for the broker.
*/
public EtlRequest(JobContext context, String topic, String leaderId, int partition, URI brokerUri) {
this(context, topic, leaderId, partition, brokerUri, DEFAULT_OFFSET);
}
/**
* Constructor for the full kafka pull job. Neither the brokerUri nor offset
* are used to ensure uniqueness.
*
* @param topic
* The topic name
* @param leaderId
* The leader broker for this topic and partition
* @param partition
* The partition to pull
* @param brokerUri
* The uri for the broker
* @param offset
*/
public EtlRequest(JobContext context, String topic, String leaderId, int partition,
URI brokerUri, long offset) {
this.context = context;
this.topic = topic;
this.leaderId = leaderId;
this.uri = brokerUri;
this.partition = partition;
setOffset(offset);
}
/* (non-Javadoc)
* @see com.linkedin.camus.etl.kafka.common.CamusRequest#setOffset(long)
*/
@Override
public void setOffset(long offset) {
this.offset = offset;
}
/* (non-Javadoc)
* @see com.linkedin.camus.etl.kafka.common.CamusRequest#setURI(java.net.URI)
*/
@Override
public void setURI(URI uri) {
this.uri = uri;
}
/**
* Retrieve the broker node id.
*
* @return
*/
public String getLeaderId() {
return this.leaderId;
}
/* (non-Javadoc)
* @see com.linkedin.camus.etl.kafka.common.CamusRequest#getTopic()
*/
@Override
public String getTopic() {
return this.topic;
}
/* (non-Javadoc)
* @see com.linkedin.camus.etl.kafka.common.CamusRequest#getURI()
*/
@Override
public URI getURI() {
return this.uri;
}
/* (non-Javadoc)
* @see com.linkedin.camus.etl.kafka.common.CamusRequest#getPartition()
*/
@Override
public int getPartition() {
return this.partition;
}
/* (non-Javadoc)
* @see com.linkedin.camus.etl.kafka.common.CamusRequest#getOffset()
*/
@Override
public long getOffset() {
return this.offset;
}
public void setLeaderId(String leaderId) {
this.leaderId = leaderId;
}
/* (non-Javadoc)
* @see com.linkedin.camus.etl.kafka.common.CamusRequest#isValidOffset()
*/
@Override
public boolean isValidOffset() {
return this.offset >= 0;
}
@Override
public String toString() {
return topic + "\turi:" + (uri != null ? uri.toString() : "") + "\tleader:" + leaderId
+ "\tpartition:" + partition
+ "\tearliest_offset:" + getEarliestOffset()
+ "\toffset:" + offset
+ "\tlatest_offset:" + getLastOffset()
+ "\tavg_msg_size:" + avgMsgSize
+ "\testimated_size:" + estimateDataSize();
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof EtlRequest)) return false;
EtlRequest that = (EtlRequest) o;
if (partition != that.partition) return false;
if (!topic.equals(that.topic)) return false;
return true;
}
@Override
public int hashCode() {
int result = topic.hashCode();
result = 31 * result + partition;
return result;
}
/**
* Returns the copy of KafkaETLRequest
*/
@Override
public CamusRequest clone() {
return new EtlRequest(context, topic, leaderId, partition, uri, offset);
}
/* (non-Javadoc)
* @see com.linkedin.camus.etl.kafka.common.CamusRequest#getEarliestOffset()
*/
@Override
public long getEarliestOffset() {
if (this.earliestOffset == -2 && uri != null) {
// TODO : Make the hardcoded paramters configurable
SimpleConsumer consumer = new SimpleConsumer(uri.getHost(), uri.getPort(), 60000,
1024 * 1024, "hadoop-etl");
Map<TopicAndPartition, PartitionOffsetRequestInfo> offsetInfo = new HashMap<TopicAndPartition, PartitionOffsetRequestInfo>();
offsetInfo.put(new TopicAndPartition(topic, partition), new PartitionOffsetRequestInfo(
kafka.api.OffsetRequest.EarliestTime(), 1));
OffsetResponse response = consumer
.getOffsetsBefore(new OffsetRequest(offsetInfo, kafka.api.OffsetRequest
.CurrentVersion(), "hadoop-etl"));
long[] endOffset = response.offsets(topic, partition);
consumer.close();
this.earliestOffset = endOffset[0];
return endOffset[0];
} else {
return this.earliestOffset;
}
}
/* (non-Javadoc)
* @see com.linkedin.camus.etl.kafka.common.CamusRequest#getLastOffset()
*/
@Override
public long getLastOffset() {
if (this.latestOffset == -1 && uri != null)
return getLastOffset(kafka.api.OffsetRequest.LatestTime());
else
{
return this.latestOffset;
}
}
/* (non-Javadoc)
* @see com.linkedin.camus.etl.kafka.common.CamusRequest#getLastOffset(long)
*/
@Override
public long getLastOffset(long time) {
SimpleConsumer consumer = new SimpleConsumer(uri.getHost(), uri.getPort(), 60000,
1024 * 1024, "hadoop-etl");
Map<TopicAndPartition, PartitionOffsetRequestInfo> offsetInfo = new HashMap<TopicAndPartition, PartitionOffsetRequestInfo>();
offsetInfo.put(new TopicAndPartition(topic, partition), new PartitionOffsetRequestInfo(
time, 1));
OffsetResponse response = consumer.getOffsetsBefore(new OffsetRequest(offsetInfo,
kafka.api.OffsetRequest.CurrentVersion(),"hadoop-etl"));
long[] endOffset = response.offsets(topic, partition);
consumer.close();
if(endOffset.length == 0)
{
log.info("The exception is thrown because the latest offset retunred zero for topic : " + topic + " and partition " + partition);
}
this.latestOffset = endOffset[0];
return endOffset[0];
}
/* (non-Javadoc)
* @see com.linkedin.camus.etl.kafka.common.CamusRequest#estimateDataSize()
*/
@Override
public long estimateDataSize() {
long endOffset = getLastOffset();
return (endOffset - offset) * avgMsgSize;
}
/* (non-Javadoc)
* @see com.linkedin.camus.etl.kafka.common.CamusRequest#estimateDataSize(long)
*/
@Override
public long estimateDataSize(long endTime) {
long endOffset = getLastOffset(endTime);
return (endOffset - offset) * avgMsgSize;
}
@Override
public void readFields(DataInput in) throws IOException {
topic = UTF8.readString(in);
leaderId = UTF8.readString(in);
String str = UTF8.readString(in);
if (!str.isEmpty())
try {
uri = new URI(str);
} catch (URISyntaxException e) {
throw new RuntimeException(e);
}
partition = in.readInt();
offset = in.readLong();
latestOffset = in.readLong();
}
@Override
public void write(DataOutput out) throws IOException {
UTF8.writeString(out, topic);
UTF8.writeString(out, leaderId);
if (uri != null)
UTF8.writeString(out, uri.toString());
else
UTF8.writeString(out, "");
out.writeInt(partition);
out.writeLong(offset);
out.writeLong(latestOffset);
}
}