Package com.netflix.suro.sink.kafka

Source Code of com.netflix.suro.sink.kafka.KafkaSinkV2

/*
* Copyright 2014 Netflix, Inc.
*
*    Licensed under the Apache License, Version 2.0 (the "License");
*    you may not use this file except in compliance with the License.
*    You may obtain a copy of the License at
*
*        http://www.apache.org/licenses/LICENSE-2.0
*
*    Unless required by applicable law or agreed to in writing, software
*    distributed under the License is distributed on an "AS IS" BASIS,
*    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*    See the License for the specific language governing permissions and
*    limitations under the License.
*/

package com.netflix.suro.sink.kafka;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.core.type.TypeReference;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import com.netflix.servo.monitor.Monitors;
import com.netflix.suro.message.Message;
import com.netflix.suro.message.MessageContainer;
import com.netflix.suro.queue.MemoryQueue4Sink;
import com.netflix.suro.queue.MessageQueue4Sink;
import com.netflix.suro.sink.Sink;
import com.netflix.suro.sink.ThreadPoolQueuedSink;
import kafka.producer.DefaultPartitioner;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.clients.producer.RecordMetadata;
import org.apache.kafka.common.Metric;
import org.apache.kafka.common.errors.UnknownTopicOrPartitionException;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicLong;

/**
* Kafka 0.8.2 Sink, using new Java-native producer, rather than Scala produer.
* Requests are re-queued indefinitely if they fail.
*
* The configuration parameters for the new kafka producer are listed in:
* http://kafka.apache.org/documentation.html#newproducerconfigs
*
* @author jbae
* @author starzia
*/
public class KafkaSinkV2 extends ThreadPoolQueuedSink implements Sink {
    public final static String TYPE = "KafkaV2";

    private String clientId;
    private final Map<String, String> keyTopicMap;

    private final KafkaProducer producer;
    private long msgId = 0;
    private AtomicLong receivedCount = new AtomicLong(0);
    private AtomicLong sentCount = new AtomicLong(0);
    private AtomicLong sentByteCount = new AtomicLong(0);
    /** number of times a message send failed without retrying */
    private AtomicLong droppedCount = new AtomicLong(0);
    /** number of times a message send failed but was requeued */
    private AtomicLong requeuedCount = new AtomicLong(0);


    private final DefaultPartitioner partitioner = new DefaultPartitioner(null); // old Scala partitioner

    @JsonCreator
    public KafkaSinkV2(
            @JsonProperty("queue4Sink") MessageQueue4Sink queue4Sink,
            @JsonProperty("client.id") String clientId,
            @JsonProperty("metadata.broker.list") String bootstrapServers,
            @JsonProperty("compression.codec") String codec,
            @JsonProperty("send.buffer.bytes") int sendBufferBytes,
            @JsonProperty("batchSize") int batchSize,
            @JsonProperty("batchTimeout") int batchTimeout,
            @JsonProperty("request.timeout.ms") int requestTimeout,
            @JsonProperty("kafka.etc") Properties etcProps,
            @JsonProperty("keyTopicMap") Map<String, String> keyTopicMap,
            @JsonProperty("jobQueueSize") int jobQueueSize,
            @JsonProperty("corePoolSize") int corePoolSize,
            @JsonProperty("maxPoolSize") int maxPoolSize,
            @JsonProperty("jobTimeout") long jobTimeout,
            @JsonProperty("pauseOnLongQueue") boolean pauseOnLongQueue
    ) {
        super(jobQueueSize, corePoolSize, maxPoolSize, jobTimeout,
                KafkaSink.class.getSimpleName() + "-" + clientId);

        Preconditions.checkNotNull(bootstrapServers);
        Preconditions.checkNotNull(clientId);

        this.clientId = clientId;
        initialize(
                "kafka_" + clientId,
                queue4Sink == null ? new MemoryQueue4Sink(10000) : queue4Sink,
                batchSize,
                batchTimeout,
                pauseOnLongQueue);

        Properties props = new Properties();
        props.put("client.id", clientId);
        // metadata.broker.list was renamed to bootstrap.servers in the new kafka producer
        props.put("bootstrap.servers", bootstrapServers);
        if (codec != null) {
            props.put("compression.codec", codec);
        }
        if (sendBufferBytes > 0) {
            props.put("send.buffer.bytes", Integer.toString(sendBufferBytes));
        }
        if (requestTimeout > 0) {
            props.put("request.timeout.ms", Integer.toString(requestTimeout));
        }

        if (etcProps != null) {
            props.putAll(etcProps);
        }

        this.keyTopicMap = keyTopicMap != null ? keyTopicMap : Maps.<String, String>newHashMap();

        producer = new KafkaProducer( props );

        Monitors.registerObject(clientId, this);
    }

    @Override
    public void writeTo(MessageContainer message) {
        long key = msgId++;
        if (!keyTopicMap.isEmpty()) {
            try {
                Map<String, Object> msgMap = message.getEntity(new TypeReference<Map<String, Object>>() {});
                Object keyField = msgMap.get(keyTopicMap.get(message.getRoutingKey()));
                if (keyField != null) {
                    key = keyField.hashCode();
                }
            } catch (Exception e) {
                log.error("Exception on getting key field: " + e.getMessage());
            }
        }
        log.trace( "KafkaSink writeTo()" );
        receivedCount.incrementAndGet();
        enqueue(new SuroKeyedMessage(key, message.getMessage()));
    }

    @Override
    public void open() {
        setName(KafkaSink.class.getSimpleName() + "-" + clientId);
        start();
    }

    @Override
    protected void beforePolling() throws IOException { /*do nothing */}

    @Override
    protected void write(List<Message> msgList) {
        log.trace( "KafkaSink write() with {} messages", msgList.size() );
        // prepare "final" copies of the messages to be used in the anonymous class below
        final ArrayList<SuroKeyedMessage> msgCopies =
                new ArrayList<SuroKeyedMessage>( msgList.size() );
        for( Message m : msgList ){
            SuroKeyedMessage sKeyedMsg = (SuroKeyedMessage) m;
            msgCopies.add( new SuroKeyedMessage( sKeyedMsg.getKey(),
                                                 new Message( m.getRoutingKey(), m.getPayload() )));
        }

        // The new KafkaProducer does not have interface for sending multiple messages,
        // so we loop and create lots of Runnables -- this seems inefficient, but the alternative
        // has its own problems.  If we create one "big Runnable" that loops over messages we'll
        // drain the queue4sink too quickly -- all the messages will be queued in the in-memory
        // job queue storing the Runnables.
        for( final SuroKeyedMessage m : msgCopies ) {
            senders.submit(new Runnable() {
                @Override
                public void run() {
                    String topic = m.getRoutingKey();

                    // calculate the kafka partition, with backward compatibility with old kafka producer
                    int numPartitions = producer.partitionsFor(topic).size();
                    int partition = partitioner.partition(m.getKey(), numPartitions);

                    ProducerRecord r = new ProducerRecord( topic,
                                                           partition,
                                                           null, // don't store the key
                                                           m.getPayload() );
                    log.trace( "Will send message to Kafka" );
                    long startTimeMs = System.currentTimeMillis();
                    // send
                    Future<RecordMetadata> responseFtr = producer.send( r );
                    log.trace( "Started aysnc producer" );
                    boolean failure = true;
                    boolean retry = true;
                    if( responseFtr.isCancelled() ){
                        log.warn( "Kafka producer request was cancelled" );
                        // we assume that cancelled requests should not be retried.
                        retry = false;
                    }
                    try {
                        // wait for request to finish
                        RecordMetadata response = responseFtr.get();
                        if( response.topic() == null ){
                            log.warn( "Kafka producer got null topic in response" );
                        }
                        sentCount.incrementAndGet();
                        sentByteCount.addAndGet( m.getPayload().length );
                        failure = false;
                        retry = false;
                    }catch (InterruptedException e) {
                        // Assume that Interrupted means we're trying to shutdown so don't retry
                        log.warn( "Caught InterruptedException: "+ e );
                        retry = false;
                    }catch( UnknownTopicOrPartitionException e ){
                        log.warn( "Caught UnknownTopicOrPartitionException for topic: " + m.getRoutingKey()
                                  +" This may be simply because KafkaProducer does not yet have information about the brokers."
                                  +" Request will be retried.");
                    }catch (ExecutionException e) {
                        log.warn( "Caught ExecutionException: "+ e );
                    }catch (Exception e){
                        log.warn( "Caught Exception: "+e );
                    }
                    long durationMs = System.currentTimeMillis() - startTimeMs;

                    if( failure ){
                        log.warn( "Kafka producer send failed after {} milliseconds", durationMs );
                        requeuedCount.incrementAndGet();
                        if( retry ){
                            enqueue( m );
                        }else{
                            log.info("Dropped message");
                            droppedCount.incrementAndGet();
                        }
                    }else{
                        log.trace( "Kafka producer send succeeded after {} milliseconds", durationMs );
                    }
                }
            });
        }
    }

    @Override
    protected void innerClose() {
        super.innerClose();

        producer.close();
    }

    @Override
    public String recvNotice() {
        return null;
    }

    @Override
    public String getStat() {
        Map<String,? extends Metric> metrics = producer.metrics();
        StringBuilder sb = new StringBuilder();
        // add kafka producer stats, which are rates
        for( Map.Entry<String,? extends Metric> e : metrics.entrySet() ){
            sb.append("kafka.").append(e.getKey()).append(": ").append(e.getValue().value()).append('\n');
        }
        // also report our counters
        sb.append("messages-in-queue4sink: ").append( this.queue4Sink.size() ).append('\n');
        sb.append("queued-jobs: ").append( this.jobQueue.size() ).append('\n');
        sb.append("active-threads: ").append( this.senders.getActiveCount() ).append('\n');
        sb.append("received-messages: ").append( this.receivedCount.get() ).append('\n');
        sb.append("sent-messages: ").append( this.sentCount.get() ).append('\n');
        sb.append("sent-bytes: ").append( this.sentByteCount.get() ).append('\n');
        sb.append("dropped-messages: ").append( this.droppedCount.get() ).append('\n');
        sb.append("requeued-messages: ").append( this.requeuedCount.get() ).append('\n');

        return sb.toString();
    }
}
TOP

Related Classes of com.netflix.suro.sink.kafka.KafkaSinkV2

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.