Package org.elasticsearch.river.twitter

Source Code of org.elasticsearch.river.twitter.TwitterRiver$StatusHandler

/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*    http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.river.twitter;

import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.Requests;
import org.elasticsearch.client.action.bulk.BulkRequestBuilder;
import org.elasticsearch.cluster.block.ClusterBlockException;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.indices.IndexAlreadyExistsException;
import org.elasticsearch.river.AbstractRiverComponent;
import org.elasticsearch.river.River;
import org.elasticsearch.river.RiverName;
import org.elasticsearch.river.RiverSettings;
import org.elasticsearch.threadpool.ThreadPool;
import twitter4j.*;
import twitter4j.conf.ConfigurationBuilder;

import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;

/**
* @author kimchy (shay.banon)
*/
public class TwitterRiver extends AbstractRiverComponent implements River {

    private final ThreadPool threadPool;

    private final Client client;

    private String user;
    private String password;

    private String oauthConsumerKey = null;
    private String oauthConsumerSecret = null;
    private String oauthAccessToken = null;
    private String oauthAccessTokenSecret = null;

    private final String indexName;

    private final String typeName;

    private final int bulkSize;

    private final int dropThreshold;

    private FilterQuery filterQuery;

    private String streamType;


    private volatile TwitterStream stream;

    private final AtomicInteger onGoingBulks = new AtomicInteger();

    private volatile BulkRequestBuilder currentRequest;

    private volatile boolean closed = false;

    @SuppressWarnings({"unchecked"})
    @Inject public TwitterRiver(RiverName riverName, RiverSettings settings, Client client, ThreadPool threadPool) {
        super(riverName, settings);
        this.client = client;
        this.threadPool = threadPool;

        if (settings.settings().containsKey("twitter")) {
            Map<String, Object> twitterSettings = (Map<String, Object>) settings.settings().get("twitter");
            user = XContentMapValues.nodeStringValue(twitterSettings.get("user"), null);
            password = XContentMapValues.nodeStringValue(twitterSettings.get("password"), null);
            if (twitterSettings.containsKey("oauth")) {
                Map<String, Object> oauth = (Map<String, Object>) twitterSettings.get("oauth");
                if (oauth.containsKey("consumerKey")) {
                    oauthConsumerKey = XContentMapValues.nodeStringValue(oauth.get("consumerKey"), null);
                }
                if (oauth.containsKey("consumer_key")) {
                    oauthConsumerKey = XContentMapValues.nodeStringValue(oauth.get("consumer_key"), null);
                }
                if (oauth.containsKey("consumerSecret")) {
                    oauthConsumerSecret = XContentMapValues.nodeStringValue(oauth.get("consumerSecret"), null);
                }
                if (oauth.containsKey("consumer_secret")) {
                    oauthConsumerSecret = XContentMapValues.nodeStringValue(oauth.get("consumer_secret"), null);
                }
                if (oauth.containsKey("accessToken")) {
                    oauthAccessToken = XContentMapValues.nodeStringValue(oauth.get("accessToken"), null);
                }
                if (oauth.containsKey("access_token")) {
                    oauthAccessToken = XContentMapValues.nodeStringValue(oauth.get("access_token"), null);
                }
                if (oauth.containsKey("accessTokenSecret")) {
                    oauthAccessTokenSecret = XContentMapValues.nodeStringValue(oauth.get("accessTokenSecret"), null);
                }
                if (oauth.containsKey("access_token_secret")) {
                    oauthAccessTokenSecret = XContentMapValues.nodeStringValue(oauth.get("access_token_secret"), null);
                }
            }
            streamType = XContentMapValues.nodeStringValue(twitterSettings.get("type"), "sample");
            Map<String, Object> filterSettings = (Map<String, Object>) twitterSettings.get("filter");
            if (filterSettings != null) {
                filterQuery = new FilterQuery();
                filterQuery.count(XContentMapValues.nodeIntegerValue(filterSettings.get("count"), 0));
                Object tracks = filterSettings.get("tracks");
                if (tracks != null) {
                    if (tracks instanceof List) {
                        List<String> lTracks = (List<String>) tracks;
                        filterQuery.track(lTracks.toArray(new String[lTracks.size()]));
                    } else {
                        filterQuery.track(Strings.commaDelimitedListToStringArray(tracks.toString()));
                    }
                }
                Object follow = filterSettings.get("follow");
                if (follow != null) {
                    if (follow instanceof List) {
                        List lFollow = (List) follow;
                        int[] followIds = new int[lFollow.size()];
                        for (int i = 0; i < lFollow.size(); i++) {
                            Object o = lFollow.get(i);
                            if (o instanceof Number) {
                                followIds[i] = ((Number) o).intValue();
                            } else {
                                followIds[i] = Integer.parseInt(o.toString());
                            }
                        }
                        filterQuery.follow(followIds);
                    } else {
                        String[] ids = Strings.commaDelimitedListToStringArray(follow.toString());
                        int[] followIds = new int[ids.length];
                        for (int i = 0; i < ids.length; i++) {
                            followIds[i] = Integer.parseInt(ids[i]);
                        }
                        filterQuery.follow(followIds);
                    }
                }
                Object locations = filterSettings.get("locations");
                if (locations != null) {
                    if (locations instanceof List) {
                        List lLocations = (List) locations;
                        double[][] dLocations = new double[lLocations.size()][];
                        for (int i = 0; i < lLocations.size(); i++) {
                            Object loc = lLocations.get(i);
                            double lat;
                            double lon;
                            if (loc instanceof List) {
                                List lLoc = (List) loc;
                                if (lLoc.get(0) instanceof Number) {
                                    lat = ((Number) lLoc.get(0)).doubleValue();
                                } else {
                                    lat = Double.parseDouble(lLoc.get(0).toString());
                                }
                                if (lLoc.get(1) instanceof Number) {
                                    lon = ((Number) lLoc.get(1)).doubleValue();
                                } else {
                                    lon = Double.parseDouble(lLoc.get(1).toString());
                                }
                            } else {
                                String[] sLoc = Strings.commaDelimitedListToStringArray(loc.toString());
                                lat = Double.parseDouble(sLoc[0]);
                                lon = Double.parseDouble(sLoc[1]);
                            }
                            dLocations[i] = new double[]{lat, lon};
                        }
                        filterQuery.locations(dLocations);
                    } else {
                        String[] sLocations = Strings.commaDelimitedListToStringArray(locations.toString());
                        double[][] dLocations = new double[sLocations.length / 2][];
                        int dCounter = 0;
                        for (int i = 0; i < sLocations.length; i++) {
                            double lat = Double.parseDouble(sLocations[i]);
                            double lon = Double.parseDouble(sLocations[++i]);
                            dLocations[dCounter++] = new double[]{lat, lon};
                        }
                        filterQuery.locations(dLocations);
                    }
                }
            }
        }

        logger.info("creating twitter stream river for [{}]", user);

        if (user == null && password == null && oauthAccessToken == null && oauthConsumerKey == null && oauthConsumerSecret == null && oauthAccessTokenSecret == null) {
            stream = null;
            indexName = null;
            typeName = "status";
            bulkSize = 100;
            dropThreshold = 10;
            logger.warn("no user/password or oauth specified, disabling river...");
            return;
        }

        if (settings.settings().containsKey("index")) {
            Map<String, Object> indexSettings = (Map<String, Object>) settings.settings().get("index");
            indexName = XContentMapValues.nodeStringValue(indexSettings.get("index"), riverName.name());
            typeName = XContentMapValues.nodeStringValue(indexSettings.get("type"), "status");
            this.bulkSize = XContentMapValues.nodeIntegerValue(settings.settings().get("bulk_size"), 100);
            this.dropThreshold = XContentMapValues.nodeIntegerValue(settings.settings().get("drop_threshold"), 10);
        } else {
            indexName = riverName.name();
            typeName = "status";
            bulkSize = 100;
            dropThreshold = 10;
        }

        ConfigurationBuilder cb = new ConfigurationBuilder();
        if (oauthAccessToken != null && oauthConsumerKey != null && oauthConsumerSecret != null && oauthAccessTokenSecret != null) {
            cb.setOAuthConsumerKey(oauthConsumerKey)
                    .setOAuthConsumerSecret(oauthConsumerSecret)
                    .setOAuthAccessToken(oauthAccessToken)
                    .setOAuthAccessTokenSecret(oauthAccessTokenSecret);
        } else {
            cb.setUser(user).setPassword(password);
        }
        stream = new TwitterStreamFactory(cb.build()).getInstance();
        stream.addListener(new StatusHandler());
    }

    @Override public void start() {
        if (stream == null) {
            return;
        }
        logger.info("starting twitter stream");
        try {
            String mapping = XContentFactory.jsonBuilder().startObject().startObject(typeName).startObject("properties")
                    .startObject("location").field("type", "geo_point").endObject()
                    .startObject("user").startObject("properties").startObject("screen_name").field("type", "string").field("index", "not_analyzed").endObject().endObject().endObject()
                    .startObject("mention").startObject("properties").startObject("screen_name").field("type", "string").field("index", "not_analyzed").endObject().endObject().endObject()
                    .startObject("in_reply").startObject("properties").startObject("user_screen_name").field("type", "string").field("index", "not_analyzed").endObject().endObject().endObject()
                    .endObject().endObject().endObject().string();
            client.admin().indices().prepareCreate(indexName).addMapping(typeName, mapping).execute().actionGet();
        } catch (Exception e) {
            if (ExceptionsHelper.unwrapCause(e) instanceof IndexAlreadyExistsException) {
                // that's fine
            } else if (ExceptionsHelper.unwrapCause(e) instanceof ClusterBlockException) {
                // ok, not recovered yet..., lets start indexing and hope we recover by the first bulk
                // TODO: a smarter logic can be to register for cluster event listener here, and only start sampling when the block is removed...
            } else {
                logger.warn("failed to create index [{}], disabling river...", e, indexName);
                return;
            }
        }
        currentRequest = client.prepareBulk();
        if (streamType.equals("filter") || filterQuery != null) {
            try {
                stream.filter(filterQuery);
            } catch (TwitterException e) {
                logger.warn("failed to create filter stream based on query, disabling river....");
            }
        } else if (streamType.equals("firehose")) {
            stream.firehose(0);
        } else {
            stream.sample();
        }
    }

    private void reconnect() {
        if (closed) {
            return;
        }
        try {
            stream.cleanUp();
        } catch (Exception e) {
            logger.debug("failed to cleanup after failure", e);
        }
        try {
            stream.shutdown();
        } catch (Exception e) {
            logger.debug("failed to shutdown after failure", e);
        }
        if (closed) {
            return;
        }

        try {
            ConfigurationBuilder cb = new ConfigurationBuilder();
            if (oauthAccessToken != null && oauthConsumerKey != null && oauthConsumerSecret != null && oauthAccessTokenSecret != null) {
                cb.setOAuthConsumerKey(oauthConsumerKey)
                        .setOAuthConsumerSecret(oauthConsumerSecret)
                        .setOAuthAccessToken(oauthAccessToken)
                        .setOAuthAccessTokenSecret(oauthAccessTokenSecret);
            } else {
                cb.setUser(user).setPassword(password);
            }
            stream = new TwitterStreamFactory(cb.build()).getInstance();
            stream.addListener(new StatusHandler());

            if (streamType.equals("filter") || filterQuery != null) {
                try {
                    stream.filter(filterQuery);
                } catch (TwitterException e) {
                    logger.warn("failed to create filter stream based on query, disabling river....");
                }
            } else if (streamType.equals("firehose")) {
                stream.firehose(0);
            } else {
                stream.sample();
            }
        } catch (Exception e) {
            if (closed) {
                close();
                return;
            }
            // TODO, we can update the status of the river to RECONNECT
            logger.warn("failed to connect after failure, throttling", e);
            threadPool.schedule(TimeValue.timeValueSeconds(10), ThreadPool.Names.CACHED, new Runnable() {
                @Override public void run() {
                    reconnect();
                }
            });
        }
    }

    @Override public void close() {
        this.closed = true;
        logger.info("closing twitter stream river");
        if (stream != null) {
            stream.cleanUp();
            stream.shutdown();
        }
    }

    private class StatusHandler extends StatusAdapter {

        @Override public void onStatus(Status status) {
            if (logger.isTraceEnabled()) {
                logger.trace("status {} : {}", status.getUser().getName(), status.getText());
            }
            try {
                XContentBuilder builder = XContentFactory.jsonBuilder().startObject();
                builder.field("text", status.getText());
                builder.field("created_at", status.getCreatedAt());
                builder.field("source", status.getSource());
                builder.field("truncated", status.isTruncated());

                if (status.getUserMentionEntities() != null) {
                    builder.startArray("mention");
                    for (UserMentionEntity user : status.getUserMentionEntities()) {
                        builder.startObject();
                        builder.field("id", user.getId());
                        builder.field("name", user.getName());
                        builder.field("screen_name", user.getScreenName());
                        builder.field("start", user.getStart());
                        builder.field("end", user.getEnd());
                        builder.endObject();
                    }
                    builder.endArray();
                }

                if (status.getRetweetCount() != -1) {
                    builder.field("retweet_count", status.getRetweetCount());
                }

                if (status.getInReplyToStatusId() != -1) {
                    builder.startObject("in_reply");
                    builder.field("status", status.getInReplyToStatusId());
                    if (status.getInReplyToUserId() != -1) {
                        builder.field("user_id", status.getInReplyToUserId());
                        builder.field("user_screen_name", status.getInReplyToScreenName());
                    }
                    builder.endObject();
                }

                if (status.getHashtagEntities() != null) {
                    builder.startArray("hashtag");
                    for (HashtagEntity hashtag : status.getHashtagEntities()) {
                        builder.startObject();
                        builder.field("text", hashtag.getText());
                        builder.field("start", hashtag.getStart());
                        builder.field("end", hashtag.getEnd());
                        builder.endObject();
                    }
                    builder.endArray();
                }
                if (status.getContributors() != null) {
                    builder.array("contributor", status.getContributors());
                }
                if (status.getGeoLocation() != null) {
                    builder.startObject("location");
                    builder.field("lat", status.getGeoLocation().getLatitude());
                    builder.field("lon", status.getGeoLocation().getLongitude());
                    builder.endObject();
                }
                if (status.getPlace() != null) {
                    builder.startObject("place");
                    builder.field("id", status.getPlace().getId());
                    builder.field("name", status.getPlace().getName());
                    builder.field("type", status.getPlace().getPlaceType());
                    builder.field("full_name", status.getPlace().getFullName());
                    builder.field("street_address", status.getPlace().getStreetAddress());
                    builder.field("country", status.getPlace().getCountry());
                    builder.field("country_code", status.getPlace().getCountryCode());
                    builder.field("url", status.getPlace().getURL());
                    builder.endObject();
                }
                if (status.getURLEntities() != null) {
                    builder.startArray("link");
                    for (URLEntity url : status.getURLEntities()) {
                        if (url != null) {
                            builder.startObject();
                            builder.field("url", url.getURL().toExternalForm());
                            if (url.getDisplayURL() != null) {
                                builder.field("display_url", url.getDisplayURL());
                            }
                            if (url.getExpandedURL() != null) {
                                builder.field("expand_url", url.getExpandedURL());
                            }
                            builder.field("start", url.getStart());
                            builder.field("end", url.getEnd());
                            builder.endObject();
                        }
                    }
                    builder.endArray();
                }
                if (status.getAnnotations() != null) {
                    builder.startObject("annotation");
                    List<Annotation> annotations = status.getAnnotations().getAnnotations();
                    for (Annotation ann : annotations) {
                        builder.startObject(ann.getType());
                        Map<String, String> attributes = ann.getAttributes();
                        for (Map.Entry<String, String> entry : attributes.entrySet()) {
                            builder.field(entry.getKey(), entry.getValue());
                        }
                        builder.endObject();
                    }
                    builder.endObject();
                }

                builder.startObject("user");
                builder.field("id", status.getUser().getId());
                builder.field("name", status.getUser().getName());
                builder.field("screen_name", status.getUser().getScreenName());
                builder.field("location", status.getUser().getLocation());
                builder.field("description", status.getUser().getDescription());
                builder.endObject();

                builder.endObject();
                currentRequest.add(Requests.indexRequest(indexName).type(typeName).id(Long.toString(status.getId())).create(true).source(builder));
                processBulkIfNeeded();
            } catch (Exception e) {
                logger.warn("failed to construct index request", e);
            }
        }

        @Override public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {
            if (statusDeletionNotice.getStatusId() != -1) {
                currentRequest.add(Requests.deleteRequest(indexName).type(typeName).id(Long.toString(statusDeletionNotice.getStatusId())));
                processBulkIfNeeded();
            }
        }

        @Override public void onTrackLimitationNotice(int numberOfLimitedStatuses) {
            logger.info("received track limitation notice, number_of_limited_statuses {}", numberOfLimitedStatuses);
        }

        @Override public void onException(Exception ex) {
            logger.warn("stream failure, restarting stream...", ex);
            threadPool.cached().execute(new Runnable() {
                @Override public void run() {
                    reconnect();
                }
            });
        }

        private void processBulkIfNeeded() {
            if (currentRequest.numberOfActions() >= bulkSize) {
                // execute the bulk operation
                int currentOnGoingBulks = onGoingBulks.incrementAndGet();
                if (currentOnGoingBulks > dropThreshold) {
                    onGoingBulks.decrementAndGet();
                    logger.warn("dropping bulk, [{}] crossed threshold [{}]", onGoingBulks, dropThreshold);
                } else {
                    try {
                        currentRequest.execute(new ActionListener<BulkResponse>() {
                            @Override public void onResponse(BulkResponse bulkResponse) {
                                onGoingBulks.decrementAndGet();
                            }

                            @Override public void onFailure(Throwable e) {
                                logger.warn("failed to execute bulk");
                            }
                        });
                    } catch (Exception e) {
                        logger.warn("failed to process bulk", e);
                    }
                }
                currentRequest = client.prepareBulk();
            }
        }
    }
}
TOP

Related Classes of org.elasticsearch.river.twitter.TwitterRiver$StatusHandler

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.