Package org.eurekastreams.server.action.execution.feed

Source Code of org.eurekastreams.server.action.execution.feed.RefreshFeedExecution

/*
* Copyright (c) 2010 Lockheed Martin Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.eurekastreams.server.action.execution.feed;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.eurekastreams.commons.actions.ExecutionStrategy;
import org.eurekastreams.commons.actions.context.ActionContext;
import org.eurekastreams.commons.exceptions.ExecutionException;
import org.eurekastreams.commons.logging.LogFactory;
import org.eurekastreams.server.action.request.feed.RefreshFeedRequest;
import org.eurekastreams.server.domain.DomainGroup;
import org.eurekastreams.server.domain.EntityType;
import org.eurekastreams.server.domain.GeneralGadgetDefinition;
import org.eurekastreams.server.domain.Person;
import org.eurekastreams.server.domain.gadgetspec.GadgetMetaDataDTO;
import org.eurekastreams.server.domain.stream.Activity;
import org.eurekastreams.server.domain.stream.ActivityVerb;
import org.eurekastreams.server.domain.stream.BaseObjectType;
import org.eurekastreams.server.domain.stream.plugins.Feed;
import org.eurekastreams.server.domain.stream.plugins.FeedSubscriber;
import org.eurekastreams.server.persistence.mappers.FindByIdMapper;
import org.eurekastreams.server.persistence.mappers.InsertMapper;
import org.eurekastreams.server.persistence.mappers.UpdateMapper;
import org.eurekastreams.server.persistence.mappers.cache.Cache;
import org.eurekastreams.server.persistence.mappers.cache.CacheKeys;
import org.eurekastreams.server.persistence.mappers.requests.FindByIdRequest;
import org.eurekastreams.server.persistence.mappers.requests.PersistenceRequest;
import org.eurekastreams.server.service.actions.strategies.activity.plugins.FeedObjectActivityBuilder;
import org.eurekastreams.server.service.actions.strategies.activity.plugins.ObjectBuilderForSpecificUrl;
import org.eurekastreams.server.service.actions.strategies.activity.plugins.rome.ActivityStreamsModule;
import org.eurekastreams.server.service.actions.strategies.activity.plugins.rome.FeedFactory;
import org.eurekastreams.server.service.opensocial.gadgets.spec.GadgetMetaDataFetcher;

import com.sun.syndication.feed.module.SyModule;
import com.sun.syndication.feed.synd.SyndEntryImpl;
import com.sun.syndication.feed.synd.SyndFeed;

/**
* Goes out to the interwebs, grabs a feed, parses it, stores it in DB and queues it up to be stored in cache. The
* reason for the queueing is so I don't update the same exact list 100s of times instead of once. This cuts down by
* multiple orders of magnitude
*
*/
public class RefreshFeedExecution implements ExecutionStrategy<ActionContext>
{
    /**
     * The number of milliseconds in a minute.
     */
    private static final int MS_IN_MIN = 60000;
    /**
     * The number of minutes in an hour.
     */
    private static final int MINS_IN_HOUR = 60;
    /**
     * The number of hours in a day.
     */
    private static final int HOURS_IN_DAY = 24;
    /**
     * The number of days in a week.
     */
    private static final int DAYS_IN_WEEK = 7;
    /**
     * The number of days in a month.
     */
    private static final int DAYS_IN_MONTH = 31;
    /**
     * Tne number of days in a year.
     */
    private static final int DAYS_IN_YEAR = 365;

    /**
     * Logger.
     */
    private final Log log = LogFactory.make();

    /**
     * Standard feed mappers.
     */
    private final HashMap<BaseObjectType, FeedObjectActivityBuilder> standardFeedMappers;

    /**
     * Mappers for specific websites.
     */
    private final List<ObjectBuilderForSpecificUrl> specificUrlMappers;

    /**
     * Bulk insert activity into the DB.
     */
    private final InsertMapper<Activity> activityDBInserter;

    /**
     * The cache.
     */
    private final Cache cache;

    /**
     * Feed fetcher factory, really only needed for testing.
     */
    private final FeedFactory feedFetcherFactory;

    /**
     * Person finder.
     */
    private final FindByIdMapper<Person> personFinder;
    /**
     * Group finder.
     */
    private final FindByIdMapper<DomainGroup> groupFinder;

    /**
     * Feed finder.
     */
    private final FindByIdMapper<Feed> feedFinder;

    /**
     * Meta data fetcher.
     */
    private GadgetMetaDataFetcher metaDataFetcher = null;

    /**
     * Update mapper.
     */
    private UpdateMapper<Feed> updateFeedMapper = null;

    /**
     * Out of order feed list.
     */
    private List<String> outOfOrderFeeds;

    /**
     * Default constructor.
     *
     * @param inStandardFeedMappers
     *            Standard feed mappers.
     * @param inSpecificUrlMappers
     *            Mappers for specific websites.
     * @param inActivityDBInserter
     *            Insert activity into the DB.
     * @param inCache
     *            The cache.
     * @param inFeedFetcherFactory
     *            feed fetcher factory.
     * @param inPersonFinder
     *            find person.
     * @param inGroupFinder
     *            find group.
     * @param inFeedFinder
     *            find feed.
     * @param inMetaDataFetcher
     *            fetcher.
     * @param inUpdateFeedMapper
     *            updateMapper.
     * @param inOutOfOrderFeeds
     *            known out of order feeds.
     */
    public RefreshFeedExecution(final HashMap<BaseObjectType, FeedObjectActivityBuilder> inStandardFeedMappers,
            final List<ObjectBuilderForSpecificUrl> inSpecificUrlMappers,
            final InsertMapper<Activity> inActivityDBInserter, final Cache inCache,
            final FeedFactory inFeedFetcherFactory, final FindByIdMapper<Person> inPersonFinder,
            final FindByIdMapper<DomainGroup> inGroupFinder, final FindByIdMapper<Feed> inFeedFinder,
            final GadgetMetaDataFetcher inMetaDataFetcher, final UpdateMapper<Feed> inUpdateFeedMapper,
            final List<String> inOutOfOrderFeeds)
    {
        standardFeedMappers = inStandardFeedMappers;
        specificUrlMappers = inSpecificUrlMappers;
        activityDBInserter = inActivityDBInserter;
        cache = inCache;
        feedFetcherFactory = inFeedFetcherFactory;
        personFinder = inPersonFinder;
        groupFinder = inGroupFinder;
        feedFinder = inFeedFinder;
        metaDataFetcher = inMetaDataFetcher;
        updateFeedMapper = inUpdateFeedMapper;
        outOfOrderFeeds = inOutOfOrderFeeds;
    }

    /**
     * {@inheritDoc}.
     *
     * Grab all the feeds, set them as pending, and fire off an async job. to refresh each one.
     */
    @Override
    public Serializable execute(final ActionContext inActionContext) throws ExecutionException
    {
        Boolean brokenFeed = true;
        String lastSeenGUID = "";
        RefreshFeedRequest request = (RefreshFeedRequest) inActionContext.getParams();
        Feed feed = feedFinder.execute(new FindByIdRequest("Feed", request.getFeedId()));
        Date lastPostDate = feed.getLastPostDate();
        Long updateFrequency = null;
        Boolean isOutOfOrder = false;

        log.info("Processor feed: " + feed.getUrl());
        for (String oooFeed : outOfOrderFeeds)
        {
            if (feed.getUrl().contains(oooFeed))
            {
                log.info("Feed marked out of order: " + feed.getUrl());
                isOutOfOrder = true;
                break;
            }
        }

        try
        {
            // fetch the feeds
            // Gives the fetcher the feed and a list of the requestors; the fetcher will decide if it can make a single
            // unauthenticated request or if it needs to make one request per requestor. A set is used to prevent
            // giving the fetcher any duplicates.
            Set<String> requestorAccounts = new HashSet<String>();
            for (FeedSubscriber feedSubscriber : feed.getFeedSubscribers())
            {
                requestorAccounts.add(feedSubscriber.getRequestor().getAccountId());
            }
            Map<String, SyndFeed> syndFeeds = feedFetcherFactory.getSyndicatedFeed(feed.getUrl(), requestorAccounts);

            FeedObjectActivityBuilder selectedObjectMapper = null;
            for (ObjectBuilderForSpecificUrl entry : specificUrlMappers)
            {
                if (entry.match(feed.getUrl()))
                {
                    selectedObjectMapper = entry.getBuilder();
                    break;
                }
            }

            // iterate through all feed instances returned by the fetcher
            List<Activity> insertedActivities = new LinkedList<Activity>();
            for (Map.Entry<String, SyndFeed> mapEntry : syndFeeds.entrySet())
            {
                SyndFeed syndFeed = mapEntry.getValue();
                List<FeedSubscriber> subscribers = getFeedSubscribers(mapEntry.getKey(), feed);

                // check for update frequency info
                if (updateFrequency == null)
                {
                    SyModule syMod = (SyModule) syndFeed.getModule(SyModule.URI);
                    if (syMod != null)
                    {
                        updateFrequency = getUpdateFrequency(syMod.getUpdatePeriod(), syMod.getUpdateFrequency());
                    }
                }

                if (syndFeed.getEntries().size() > 0)
                {
                    SyndEntryImpl entry = (SyndEntryImpl) syndFeed.getEntries().get(0);
                    lastSeenGUID = entry.getUri();
                }

                Boolean brokenOutOfOrder = false;

                if (isOutOfOrder && feed.getLastSeenGUID() != null)
                {
                    brokenOutOfOrder = true;
                    // iterate through each entry in the feed instance
                    for (Object entryObject : syndFeed.getEntries())
                    {
                        try
                        {
                            SyndEntryImpl entry = (SyndEntryImpl) entryObject;
                            if (feed.getLastSeenGUID().equals(entry.getUri()))
                            {
                                log.info("Found  matching GUID in out of order feed: " + lastSeenGUID);
                                brokenOutOfOrder = false;
                                break;
                            }
                        }
                        catch (Exception ex)
                        {
                            log.warn("ATOM/RSS entry is not to spec. "
                                    + "Skipping entry and moving to the next one. Feed url: " + feed.getUrl(), ex);
                        }
                    }
                }

                if (!brokenOutOfOrder)
                {
                    // iterate through each entry in the feed instance
                    for (Object entryObject : syndFeed.getEntries())
                    {
                        try
                        {
                            SyndEntryImpl entry = (SyndEntryImpl) entryObject;

                            if (lastPostDate == null || entry.getPublishedDate().after(lastPostDate))
                            {
                                lastPostDate = entry.getPublishedDate();
                            }

                            Activity activity = getActivityFromATOMEntry(feed, entry, selectedObjectMapper);
                            // We were able to parse at least one good entry to completion, so the feed isn't broken.
                            brokenFeed = false;

                            if (isOutOfOrder && feed.getLastSeenGUID().equals(entry.getUri()))
                            {
                                log.info("Match found based on GUID: " + lastSeenGUID);
                                break;
                            }
                            else
                            {
                                log.info("No match found based on GUID: " + entry.getUri());
                            }

                            if (!isOutOfOrder && !entry.getPublishedDate().after(feed.getLastPostDate()))
                            {
                                log.info("Match found based on Date: " + feed.getLastPostDate());
                                break;
                            }
                            else
                            {
                                log.info("No match found based on Date: " + entry.getPublishedDate()
                                        + " Last Post Date: " + feed.getLastPostDate());
                            }

                            // create activities per subscriber
                            for (FeedSubscriber feedSubscriber : subscribers)
                            {
                                Activity activityForIndividual = (Activity) activity.clone();

                                if (feedSubscriber.getEntityType().equals(EntityType.PERSON))
                                {
                                    Person person = personFinder.execute(new FindByIdRequest("Person", feedSubscriber
                                            .getEntityId()));

                                    if (person.isAccountLocked())
                                    {
                                        log.info("Ignoring locked account: " + person.getAccountId());
                                    }
                                    else
                                    {
                                        activityForIndividual.setActorId(person.getAccountId());
                                        activityForIndividual.setRecipientStreamScope(person.getStreamScope());
                                        activityForIndividual.setIsDestinationStreamPublic(true);

                                        activityForIndividual.setActorType(feedSubscriber.getEntityType());
                                        insertedActivities.add(activityForIndividual);
                                    }
                                }
                                else if (feedSubscriber.getEntityType().equals(EntityType.GROUP))
                                {
                                    DomainGroup group = groupFinder.execute(new FindByIdRequest("DomainGroup",
                                            feedSubscriber.getEntityId()));

                                    activityForIndividual.setActorId(group.getShortName());
                                    activityForIndividual.setRecipientStreamScope(group.getStreamScope());
                                    activityForIndividual.setIsDestinationStreamPublic(group.isPublicGroup());

                                    activityForIndividual.setActorType(feedSubscriber.getEntityType());
                                    insertedActivities.add(activityForIndividual);
                                }
                            }

                        }
                        catch (Exception ex)
                        {
                            log.warn("ATOM/RSS entry is not to spec. "
                                    + "Skipping entry and moving to the next one. Feed url: " + feed.getUrl(), ex);
                        }
                    }
                }
            }

            // updateFeedMapper.execute(new PersistenceRequest<Feed>(feed));
            if (!insertedActivities.isEmpty())
            {
                ArrayList<Long> insertedActivityIds = new ArrayList<Long>();
                Collections.reverse(insertedActivities);
                for (Activity activity : insertedActivities)
                {
                    activityDBInserter.execute(new PersistenceRequest<Activity>(activity));
                    insertedActivityIds.add(activity.getId());
                }
                Collections.reverse(insertedActivityIds);

                // TODO: this is not performant; fix
                if (cache.get(CacheKeys.BUFFERED_ACTIVITIES) == null)
                {
                    cache.setList(CacheKeys.BUFFERED_ACTIVITIES, insertedActivityIds);
                }
                else
                {
                    cache.addToTopOfList(CacheKeys.BUFFERED_ACTIVITIES, insertedActivityIds);
                }
            }
        }
        catch (Exception ex)
        {
            log.error("Error retrieving feed: " + feed.getUrl(), ex);
        }
        finally
        {
            feed.setLastSeenGUID(lastSeenGUID);
            feed.setIsFeedBroken(brokenFeed);
            feed.setLastPostDate(lastPostDate);
            feed.setLastUpdated(new Date().getTime() / MS_IN_MIN);
            feed.setUpdateFrequency(updateFrequency);
            feed.setPending(false);
        }

        return null;
    }

    /**
     * Get the Activity object from an ATOM entry.
     *
     * @param feed
     *            the feed.
     * @param inEntry
     *            the entry.
     * @param inSelectedObjectMapper
     *            the mapper.
     * @return the activity.
     */
    private Activity getActivityFromATOMEntry(final Feed feed, final SyndEntryImpl inEntry,
            final FeedObjectActivityBuilder inSelectedObjectMapper)
    {
        SyndEntryImpl entry = inEntry;
        FeedObjectActivityBuilder selectedObjectMapper = inSelectedObjectMapper;

        Activity activity = new Activity();
        activity.setAppType(EntityType.PLUGIN);
        activity.setAppId(feed.getPlugin().getId());
        activity.setAppSource(feed.getUrl());
        final Map<String, GeneralGadgetDefinition> gadgetDefs = //
        new HashMap<String, GeneralGadgetDefinition>();
        gadgetDefs.put(feed.getPlugin().getUrl(), feed.getPlugin());
        try
        {
            List<GadgetMetaDataDTO> meta = metaDataFetcher.getGadgetsMetaData(gadgetDefs);

            if (meta.size() > 0)
            {
                activity.setAppName(meta.get(0).getTitle());
            }
        }
        catch (Exception ex)
        {
            log.error("Error getting plugin definition");
            activity.setAppName(feed.getTitle());
        }
        activity.setPostedTime(entry.getPublishedDate());
        activity.setUpdated(entry.getUpdatedDate());
        activity.setVerb(ActivityVerb.POST);
        if (selectedObjectMapper == null)
        {
            BaseObjectType type = feed.getPlugin().getObjectType();

            ActivityStreamsModule activityModule = (ActivityStreamsModule) entry.getModule(ActivityStreamsModule.URI);
            if (activityModule != null)
            {
                type = BaseObjectType.valueOf(activityModule.getObjectType());
                entry = activityModule.getAtomEntry();
            }

            if (!standardFeedMappers.containsKey(type))
            {
                type = BaseObjectType.NOTE;
            }
            selectedObjectMapper = standardFeedMappers.get(type);
        }
        selectedObjectMapper.build(feed, entry, activity);

        return activity;
    }

    /**
     * Returns the subscribers applicable to receive feed results returned for a given requestor.
     *
     * @param requestorId
     *            The requestor.
     * @param feed
     *            The feed definition.
     * @return List of subscribers.
     */
    private List<FeedSubscriber> getFeedSubscribers(final String requestorId, final Feed feed)
    {
        if (requestorId == null)
        {
            return feed.getFeedSubscribers();
        }

        for (FeedSubscriber subscriber : feed.getFeedSubscribers())
        {
            if (subscriber.getRequestor().getAccountId().equals(requestorId))
            {
                return Collections.singletonList(subscriber);
            }
        }

        return Collections.EMPTY_LIST;
    }

    /**
     * Get the update frequeuncy in minutes given the period and frequency.
     *
     * @param updatePeriod
     *            Period, hourly, daily, weekly, etc.
     * @param updateFrequency
     *            Frequency, if daily, 1 would be 1 day, if hourly, 1 would be 1 hour.
     * @return the frequency in minutes.
     */
    private long getUpdateFrequency(final String updatePeriod, final int updateFrequency)
    {
        // this can't be a switch statement due to Java lameness :(
        if (updatePeriod.equals(SyModule.HOURLY))
        {
            return updateFrequency * MINS_IN_HOUR;
        }
        else if (updatePeriod.equals(SyModule.DAILY))
        {
            return updateFrequency * MINS_IN_HOUR * HOURS_IN_DAY;
        }
        else if (updatePeriod.equals(SyModule.WEEKLY))
        {
            return updateFrequency * MINS_IN_HOUR * HOURS_IN_DAY * DAYS_IN_WEEK;
        }
        else if (updatePeriod.equals(SyModule.MONTHLY))
        {
            return updateFrequency * MINS_IN_HOUR * HOURS_IN_DAY * DAYS_IN_MONTH;
        }
        else if (updatePeriod.equals(SyModule.YEARLY))
        {
            return updateFrequency * MINS_IN_HOUR * HOURS_IN_DAY * DAYS_IN_YEAR;
        }

        // default to hourly.
        return updateFrequency * MINS_IN_HOUR;
    }
}
TOP

Related Classes of org.eurekastreams.server.action.execution.feed.RefreshFeedExecution

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.