Package com.netflix.bdp.s3mper.alert.impl

Source Code of com.netflix.bdp.s3mper.alert.impl.CloudWatchAlertDispatcher

/*
*
*  Copyright 2013 Netflix, Inc.
*
*     Licensed under the Apache License, Version 2.0 (the "License");
*     you may not use this file except in compliance with the License.
*     You may obtain a copy of the License at
*
*         http://www.apache.org/licenses/LICENSE-2.0
*
*     Unless required by applicable law or agreed to in writing, software
*     distributed under the License is distributed on an "AS IS" BASIS,
*     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*     See the License for the specific language governing permissions and
*     limitations under the License.
*
*/


package com.netflix.bdp.s3mper.alert.impl;

import com.netflix.bdp.s3mper.alert.AlertDispatcher;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.services.cloudwatch.AmazonCloudWatchAsyncClient;
import com.amazonaws.services.cloudwatch.model.MetricDatum;
import com.amazonaws.services.cloudwatch.model.PutMetricDataRequest;
import com.amazonaws.services.cloudwatch.model.StandardUnit;
import com.amazonaws.services.sqs.AmazonSQSClient;
import com.amazonaws.services.sqs.model.GetQueueUrlRequest;
import com.amazonaws.services.sqs.model.SendMessageRequest;
import com.netflix.bdp.s3mper.alert.impl.AbstractMessage.QueryType;
import java.io.IOException;
import java.net.InetAddress;
import java.net.URI;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.log4j.Logger;
import org.codehaus.jackson.map.ObjectMapper;


/**
* Dispatches CloudWatch Metrics and SQS Messages on the event of consistency
* failure or timeout.
*
* @author dweeks
*/
public class CloudWatchAlertDispatcher implements AlertDispatcher {
    private static final Logger log = Logger.getLogger(CloudWatchAlertDispatcher.class.getName());
   
    private AmazonCloudWatchAsyncClient cloudWatch;
    private AmazonSQSClient sqs;
   
    private String namespace = "netflix";
    private String cloudWatchConsistencyMetric = "com.netflix.bdp.s3mper.consistency.failures";
    private String cloudWatchTimeoutMetric = "com.netflix.bdp.s3mper.consistency.timeout";
   
    private String consistencyQueue = "s3mper-alert-queue";
    private String timeoutQueue = "s3mper-timeout-queue";
    private String notificationQueue = "s3mper-notification-queue";
   
    private String consistencyQueueUrl = "";
    private String timeoutQueueUrl = "";
    private String notificationQueueUrl = "";
   
    private boolean reportingDisabled = false;
   
    private URI uri;
    private Configuration conf;
   
    private int pathReportLimit = 10;
    private int traceDepth = 15;
   
    @Override
    public void init(URI uri, Configuration conf) {
        this.uri = uri;
        this.conf = conf;
    }
   
    /**
     * Don't initialize the SQS queues unless we actually need to send a message.
     */
    private void lazyInit() {
        String keyId = conf.get("fs."+uri.getScheme()+".awsAccessKeyId");
        String keySecret = conf.get("fs."+uri.getScheme()+".awsSecretAccessKey");
       
        //An override option for accessing across accounts
        keyId = conf.get("fs."+uri.getScheme()+".override.awsAccessKeyId", keyId);
        keySecret = conf.get("fs."+uri.getScheme()+".override.awsSecretAccessKey", keySecret);
       
        synchronized(this) {
            if(cloudWatch == null) {
                initCloudWatch(keyId, keySecret);
            }
           
            if(sqs == null) {
                initSqs(keyId, keySecret);
            }
        }
       
    }
   
    private void initCloudWatch(String keyId, String keySecret) {
        log.debug("Initializing CloudWatch Client");
        cloudWatch = new AmazonCloudWatchAsyncClient(new BasicAWSCredentials(keyId, keySecret));
    }
   
    private void initSqs(String keyId, String keySecret) {
        log.debug("Initializing SQS Client");
        sqs = new AmazonSQSClient(new BasicAWSCredentials(keyId, keySecret));
       
        //SQS Consistency Queue
        consistencyQueue = conf.get("s3mper.alert.sqs.queue", consistencyQueue);
        consistencyQueueUrl = sqs.getQueueUrl(new GetQueueUrlRequest(consistencyQueue)).getQueueUrl();
       
        //SQS Timeout Queue
        timeoutQueue = conf.get("s3mper.timeout.sqs.queue", timeoutQueue);
        timeoutQueueUrl = sqs.getQueueUrl(new GetQueueUrlRequest(timeoutQueue)).getQueueUrl();
       
        //SQS Notification Queue
        notificationQueue = conf.get("s3mper.notification.sqs.queue", notificationQueue);
        notificationQueueUrl = sqs.getQueueUrl(new GetQueueUrlRequest(notificationQueue)).getQueueUrl();
       
        //Disable reporting  (Testing purposes mostly)
        reportingDisabled = conf.getBoolean("s3mper.reporting.disabled", reportingDisabled);
    }
   
    /**
     * Sends an alert detailing that the given paths were missing from a list
     * operation.
     *
     * @param missingPaths
     */
    @Override
    public void alert(List<Path> missingPaths) {
        lazyInit();
       
        if(reportingDisabled) {
            return;
        }
       
        sendCloudWatchConsistencyAlert();
        sendSQSConsistencyMessage(missingPaths, false);
    }

    private void sendCloudWatchConsistencyAlert() {
        MetricDatum datum = new MetricDatum();
        datum.setMetricName(cloudWatchConsistencyMetric);
        datum.setUnit(StandardUnit.Count);
        datum.setValue(1.0);
       
        PutMetricDataRequest request = new PutMetricDataRequest();
        request.setNamespace(namespace);
        request.setMetricData(Collections.singleton(datum));
       
        cloudWatch.putMetricData(request);
    }
    
    /**
     * Sends a message that a listing was initially inconsistent but was
     * recovered by delaying/retrying.
     *
     * @param paths
     */
    @Override
    public void recovered(List<Path> paths) {
        lazyInit();
       
        if(reportingDisabled) {
            return;
        }
       
        sendSQSConsistencyMessage(paths, true);
    }  
   
    private void sendSQSConsistencyMessage(List<Path> paths, boolean recovered) {
        S3ConsistencyMessage message = new S3ConsistencyMessage();
       
        buildMessage(message);
       
        List<String> pathStrings = new ArrayList<String>();
        boolean truncated = false;
       
        for(Path p : paths) {
            pathStrings.add(p.toUri().toString());
           
            //Truncate if the message payload gets to be too large (i.e. to many missing files)
            if(pathStrings.size() >= pathReportLimit) {
                truncated = true;
                break;
            }
        }
       
        message.setPaths(pathStrings);
        message.setTruncated(truncated);
       
        int missingFiles = paths.size();
       
        if(recovered) {
            missingFiles = 0;
        }
       
        message.setMissingFiles(missingFiles);
        message.setRecovered(recovered);
       
        sendMessage(consistencyQueueUrl, message);
       
        if(!recovered) {
            sendMessage(notificationQueueUrl, message);
        }
    }

    /**
     * Sends a message to the timeout queue indicating that a dynamodb operation
     * timedout.
     *
     * @param operation
     * @param paths
     */
    @Override
    public void timeout(String operation, List<Path> paths) {
        lazyInit();
       
        if(reportingDisabled) {
            return;
        }
       
        //TODO: Being over-protective about these timeout messages.
        try {
            sendCloudWatchTimeoutAlert();
        } catch (Exception e) {
            log.error("Failed to send cloud watch timeout alert.", e);
        }
       
        try {
            sendSQSTimeoutMessage(operation);
        } catch (Exception e) {
            log.error("Filed to send SQS timeout message.", e);
        }
    }
  
    private void sendCloudWatchTimeoutAlert() {
        MetricDatum datum = new MetricDatum();
        datum.setMetricName(cloudWatchTimeoutMetric);
        datum.setUnit(StandardUnit.Count);
        datum.setValue(1.0);
       
        PutMetricDataRequest request = new PutMetricDataRequest();
        request.setNamespace(namespace);
        request.setMetricData(Collections.singleton(datum));
       
        cloudWatch.putMetricData(request);
    }
   
    private void sendSQSTimeoutMessage(String operation) {
        S3mperTimeoutMessage message = new S3mperTimeoutMessage();
        buildMessage(message);
       
        message.setOperation(operation);
       
        sendMessage(timeoutQueueUrl, message);
    }
   
    private void buildMessage(AbstractMessage message) {
        String hostname = "unknown";
       
        try {
            hostname = InetAddress.getLocalHost().getHostName();
        } catch (UnknownHostException e) {
            log.warn("Failed to identify hostname",e);
        }
   
        message.setEpoch(System.currentTimeMillis());
        message.setTimestamp(new Date(message.getEpoch()).toString());
        message.setHostname(hostname);
       
        String username = conf.get("user.name", System.getProperty("user.name"));
       
        try {
            username = UserGroupInformation.getCurrentUser().getUserName();   
        } catch (IOException e) {
            log.warn("Failed to identify user using hadoop library.", e);
        }
       
        message.setUsername(username);
       
        message.setGenieId(conf.get("genie.job.id"));
        message.setDataovenId(conf.get("dataoven.job.id"));
        String queryId = conf.get("hive.query.id");
       
        QueryType queryType = QueryType.Unknown;
       
        if(queryId != null) {
            queryType = QueryType.Hive;
            message.setLogFile(conf.get("hive.log.file"));
        } else {
            queryId = conf.get("pig.script.id");
               
            if(queryId != null) {
                queryType = QueryType.Pig;
                message.setLogFile(conf.get("pig.logfile"));
            }
        }
       
        message.setQueryId(queryId);
        message.setQueryType(queryType);
       
        message.setJobId(conf.get("mapred.job.id"));
        message.setTaskId(conf.get("mapred.tip.id"));
        message.setAttemptId(conf.get("mapred.task.id"));
        message.setInputFile(conf.get("mapred.input.file"));
        message.setEmail(conf.get("s3mper.email"));
       
        try {
            //We have to guess at this since it may not be explicitly in the config
            if(message.getJobId() == null) {
                String[] split = conf.get("mapreduce.job.dir").split("/");
                String jobId = split[split.length - 1];

                message.setJobId(jobId);
            }
        } catch (RuntimeException e) {
            log.debug("Failed to determine job id");
        }
       
        try {
            StackTraceElement[] stack = Thread.currentThread().getStackTrace();
           
            List<String> stackTrace = new ArrayList<String>(traceDepth);
           
            for (int i = 0; i < traceDepth && i < stack.length; i++) {
                stackTrace.add(stack[i].toString());
            }
           
            message.setStackTrace(stackTrace);
        } catch (Exception e) {
            log.debug("Stacktrace generation failed", e);
        }
       
    }

    private void sendMessage(String url, AbstractMessage message) {
        SendMessageRequest sqsRequest = null;
       
        try {
            String payload = new ObjectMapper().writeValueAsString(message);
       
            if(log.isDebugEnabled()) {
                log.debug("Sending SQS: " + payload);
            }
           
            sqsRequest = new SendMessageRequest(url, payload);
        } catch (IOException e) {
            log.error("Failed to map json object.", e);
        }
       
        sqs.sendMessage(sqsRequest);
    }
   
    @Override
    public void setConfig(Configuration conf) {
        this.conf = conf;
    }

    public void setUri(URI uri) {
        this.uri = uri;
    }
   
}
TOP

Related Classes of com.netflix.bdp.s3mper.alert.impl.CloudWatchAlertDispatcher

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.