Package de.jetwick.tw

Source Code of de.jetwick.tw.TweetConsumer$QueueInfo

/**
* Copyright (C) 2010 Peter Karich <jetwick_@_pannous_._info>
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package de.jetwick.tw;

import com.google.inject.Inject;
import de.jetwick.data.JTweet;
import de.jetwick.data.UrlEntry;
import de.jetwick.snacktory.JResult;
import de.jetwick.util.GenericUrlResolver;
import de.jetwick.util.StopWatch;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import org.elasticsearch.common.cache.CacheBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* stores the tweets from the queue into the dbHelper and solr
*
* @author Peter Karich, peat_hal 'at' users 'dot' sourceforge 'dot' net
*/
public class TweetConsumer extends Thread {

    private final Logger logger = LoggerFactory.getLogger(getClass());
    private List<QueueInfo<JTweet>> inputQueues = new ArrayList<QueueInfo<JTweet>>();
    @Inject
    protected GenericUrlResolver resolver;
    private Map<Long, Object> tweetCache;
    private static final Object OBJECT = new Object();
    private UrlExtractor urlExtractor;

    public TweetConsumer() {
        super("tweet-consumer");
    }

    public GenericUrlResolver getResolver() {
        return resolver;
    }

    @Override
    public void run() {
        initTweetCache();
        urlExtractor = new UrlExtractor() {

            @Override
            public JResult getInfo(String originalUrl, int timeout) throws Exception {
                JResult res = UrlEntry.createSimpleResult(originalUrl);
                return res;
            }
        };

        int counter = 0;
        StopWatch sw = new StopWatch();
        while (true) {
            counter++;
            sw.start();
            int feeded = executeOneBatch();
            sw.stop();
            if (feeded < 10) {
                try {
                    Thread.sleep(400);
                } catch (InterruptedException ex) {
                    logger.error(getName() + " interrupted while sleeping: " + ex.getLocalizedMessage());
                    break;
                }
            }

            // print stats
            if (counter % 1000 == 0) {
                logger.info("time of polling:\t" + sw.getSeconds());
                sw = new StopWatch();

                logger.info("tweetCache size:\t" + tweetCache.size());
                logger.info("tweetTodo size:\t" + resolver.getInputQueue().size());
                for (QueueInfo<JTweet> qi : inputQueues) {
                    logger.info(qi.toString());
                }
            }
        }
        logger.warn(getName() + " finished");
    }

    public void setResolver(GenericUrlResolver resolver) {
        this.resolver = resolver;
    }

    /**
     * @param queueName the identifier of the input queue
     * @param capacity the number of elements which should fit into the input
     * queue. This should be at least twice times bigger than batchSize.
     * @param batchSize the number of elements to feed at once into main output
     * queue.
     * @return the newly registered queue
     */
    public BlockingQueue<JTweet> register(String queueName, int capacity, int batchSize) {
        BlockingQueue q = new LinkedBlockingQueue<JTweet>(capacity);
        QueueInfo qInfo = new QueueInfo(queueName, q);
        for (QueueInfo<JTweet> qi : inputQueues) {
            if (qi.getName().equals(queueName))
                throw new IllegalStateException("cannot register queue. Queue " + queueName + " already exists");
        }

        qInfo.setBatchSize(batchSize);
        inputQueues.add(qInfo);

        int sum = 0;
        for (QueueInfo<JTweet> qi : inputQueues) {
            sum += qi.getBatchSize();
        }

        int mainCapacity = resolver.getInputQueue().remainingCapacity() + resolver.getInputQueue().size();
        if (sum * 2 > mainCapacity)
            throw new IllegalStateException("cannot register queue " + queueName + " because it"
                    + " would increas capacity of all input queues too much (" + sum + ") and "
                    + " can block main queue too often, where the capacity is only:" + mainCapacity);
        return qInfo.getQueue();
    }

    public int executeOneBatch() {
        int feeded = 0;
        for (QueueInfo<JTweet> qi : inputQueues) {
            int batchSize = qi.getBatchSize();
            Queue<JTweet> queue = qi.getQueue();
            int newTweets = 0;
            for (; newTweets < batchSize; newTweets++) {
                JTweet tw = queue.poll();
                if (tw == null)
                    break;

                if (!tw.isPersistent() && tweetCache != null && tweetCache.put(tw.getTwitterId(), OBJECT) != null) {
                    newTweets--;
                    continue;
                }

                if (urlExtractor != null) {
                    for (UrlEntry ue : ((UrlExtractor) urlExtractor.setTweet(tw).run()).getUrlEntries()) {
                        tw.addUrlEntry(ue);
                    }
                }
                feeded++;
                resolver.queueObject(tw);
            }

        }
        return feeded;
    }

    public void initTweetCache() {
        if (tweetCache == null)
            tweetCache = GenericUrlResolver.createGenericCache(50000, 6 * 60);
    }

    public static class QueueInfo<JTweet> {

        private final String name;
        private long lastMeasureTime = System.currentTimeMillis();
        private final BlockingQueue<JTweet> queue;
        private int batchSize = 200;
        private int outputCount;
        private float outputFrequency;

        public QueueInfo(String name, BlockingQueue<JTweet> queue) {
            this.name = name;
            this.queue = queue;
        }

        public BlockingQueue<JTweet> getQueue() {
            return queue;
        }

        public String getName() {
            return name;
        }

        public int getBatchSize() {
            return batchSize;
        }

        public void setBatchSize(int batchSize) {
            this.batchSize = batchSize;
        }

        public void setOutputFrequency(float outputFrequency) {
            this.outputFrequency = outputFrequency;
        }

        public float getOutputFrequency() {
            return outputFrequency;
        }

        public void setLastMeasureTime(long lastMeasureTime) {
            this.lastMeasureTime = lastMeasureTime;
        }

        public long getLastMeasureTime() {
            return lastMeasureTime;
        }

        public int getOutputCount() {
            return outputCount;
        }

        public void setOutputCount(int outputCount) {
            this.outputCount = outputCount;
        }

        @Override
        public String toString() {
            return getName() + "\t size:" + getQueue().size() + "\t count:" + outputCount + "\t oFreq.:" + getOutputFrequency();
        }
    }
}
TOP

Related Classes of de.jetwick.tw.TweetConsumer$QueueInfo

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.