Package com.salas.bb.utils.parser

Source Code of com.salas.bb.utils.parser.RomeFeedParser$RedirectionRecorder

// BlogBridge -- RSS feed reader, manager, and web based service
// Copyright (C) 2002-2006 by R. Pito Salas
//
// This program is free software; you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free Software Foundation;
// either version 2 of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
// without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along with this program;
// if not, write to the Free Software Foundation, Inc., 59 Temple Place,
// Suite 330, Boston, MA 02111-1307 USA
//
// Contact: R. Pito Salas
// mailto:pitosalas@users.sourceforge.net
// More information: about BlogBridge
// http://www.blogbridge.com
// http://sourceforge.net/projects/blogbridge
//
// $Id: RomeFeedParser.java,v 1.25 2008/06/26 13:41:57 spyromus Exp $
//

package com.salas.bb.utils.parser;

import com.salas.bb.networking.manager.NetManager;
import com.salas.bb.utils.Constants;
import com.salas.bb.utils.StringUtils;
import com.salas.bb.utils.i18n.Strings;
import com.salas.bb.utils.net.IPermanentRedirectionListener;
import com.salas.bb.utils.net.URLInputStream;
import com.salas.bb.utils.parser.impl.BBSyndFeedInput;
import com.salas.bb.utils.xml.XmlReaderFactory;
import com.sun.syndication.feed.module.DCModule;
import com.sun.syndication.feed.module.DCSubject;
import com.sun.syndication.feed.module.Module;
import com.sun.syndication.feed.module.SyModule;
import com.sun.syndication.feed.synd.*;
import com.sun.syndication.io.FeedException;
import com.sun.syndication.io.SyndFeedInput;
import com.totsp.xml.syndication.content.ContentModule;

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
import java.util.regex.Matcher;

/**
* Gateway to Rome parser.
*/
public class RomeFeedParser implements IFeedParser
{
    private static final List CONTENT_TYPE_PREFERENCE;
    private static final String TYPE_HTML = "html";
    private static final String TYPE_TEXT_HTML = "text/html";

    static
    {
        CONTENT_TYPE_PREFERENCE = Arrays.asList(TYPE_TEXT_HTML, TYPE_HTML, "text/plain", "text", "text/xhtml", "xhtml");
    }

    /**
     * Parses the resource by the given URL and returns the objects.
     *
     * @param xmlURL         XML URL of the resource.
     * @param title          feed title (if known).
     * @param lastUpdateTime time of last update (server time-zone) or (-1) if not known.
     *
     * @return result.
     *
     * @throws FeedParserException
     *                              in case of any problems with parsing.
     * @throws NullPointerException if the URL is NULL.
     * @throws java.io.IOException  if there's a problem with reading feed.
     */
    public FeedParserResult parse(URL xmlURL, String title, long lastUpdateTime)
        throws FeedParserException, IOException
    {
        if (xmlURL == null) throw new NullPointerException(Strings.error("unspecified.url"));

        FeedParserResult result = new FeedParserResult();

        String xmlURLS  = xmlURL.toString();
        String username = null;
        String password = null;
        Pattern pattern = Pattern.compile("^(https?://)([^:]+):([^@]+)@(.+)$", Pattern.CASE_INSENSITIVE);
        Matcher matcher = pattern.matcher(xmlURLS);
        if (matcher.find())
        {
            username = matcher.group(2);
            password = matcher.group(3);
            xmlURL = new URL(matcher.group(1) + matcher.group(4));
        }

        // Create stream for reading the feed and register it
        URLInputStream stream = new URLInputStream(xmlURL, lastUpdateTime);
        stream.setBasicAuthenticationInfo(username, password);
        if (title == null) title = xmlURL.toString();
        NetManager.register(NetManager.TYPE_POLLING, title, title, stream);
        stream.setRedirectionListener(new RomeFeedParser.RedirectionRecorder(result));

        stream.connect();
        try
        {
            long lastModifiedTime = stream.getLastModifiedTime();
            if (lastModifiedTime == -1) lastModifiedTime = stream.getServerTime();

            if (stream.getResponseCode() != HttpURLConnection.HTTP_NOT_MODIFIED)
            {
                result = parse(stream, result, xmlURL);
            }

            Channel channel = result.getChannel();
            if (channel != null) channel.setLastUpdateServerTime(lastModifiedTime);
        } finally
        {
            stream.close();
        }

        return result;
    }


    /**
     * Parses the resource presented by a stream and returns the objects.
     *
     * @param stream    XML stream.
     * @param rootURL   URL for the relative links resolution.
     *
     * @return result.
     *
     * @throws FeedParserException  in case of any problems with parsing.
     * @throws NullPointerException if the URL is NULL.
     * @throws java.io.IOException  if there's a problem with reading feed.
     */
    public FeedParserResult parse(InputStream stream, URL rootURL)
        throws IOException, FeedParserException
    {
        return parse(stream, new FeedParserResult(), rootURL);
    }

    /**
     * Parses the resource by the given stream.
     *
     * @param aStream   stream to parse as feed.
     * @param aResult   object with result to fill.
     * @param aFeedURL  root URL of a feed for the relative links resolution.
     *
     * @return result.
     *
     * @throws FeedParserException  in case of any problems with parsing.
     * @throws IOException          if there's a problem with reading feed.
     */
    protected FeedParserResult parse(InputStream aStream, FeedParserResult aResult, URL aFeedURL)
        throws IOException, FeedParserException
    {
        try
        {
            SyndFeedInput input = new BBSyndFeedInput();
            SyndFeed feed = input.build(XmlReaderFactory.create(aStream));

            Channel channel = RomeFeedParser.convertFeed(feed, aFeedURL);
            aResult.setChannel(channel);

            // Add items
            for (SyndEntry item : (List<SyndEntry>)feed.getEntries())
            {
                channel.addItem(RomeFeedParser.convertItem(item, aFeedURL));
            }
        } catch (FeedException e)
        {
            throw new FeedParserException(Strings.error("failed.to.parse.the.feed"), e);
        }

        return aResult;
    }

    /**
     * Converts feed object into internal format.
     *
     * @param aFeed     source feed object.
     * @param aFeedURL  root URL of a feed for the relative links resolution.
     *
     * @return internal object.
     *
     * @throws MalformedURLException if URL is not valid.
     */
    private static Channel convertFeed(SyndFeed aFeed, URL aFeedURL)
        throws MalformedURLException
    {
        Channel channel = new Channel();
        channel.setAuthor(aFeed.getAuthor());
        channel.setDescription(aFeed.getDescription());
        channel.setFormat(aFeed.getFeedType());
        channel.setLanguage(aFeed.getLanguage());
        channel.setSiteURL(StringUtils.isEmpty(aFeed.getLink()) ? null
            : new URL(aFeedURL, StringUtils.fixURL(aFeed.getLink())));
        channel.setTitle(aFeed.getTitle());

        long period = getUpdatePeriod(aFeed);
        if (period != -1)
        {
            int updateFrequency = getUpdateFrequency(aFeed);
            if (updateFrequency > 1) period = period / updateFrequency;
        }

        channel.setUpdatePeriod(period);
        return channel;
    }

    /**
     * Returns update frequency of the feed in times.
     *
     * @param aFeed feed.
     *
     * @return frequency.
     */
    private static int getUpdateFrequency(SyndFeed aFeed)
    {
        SyModule module = (SyModule)aFeed.getModule(SyModule.URI);

        return module == null ? -1 : module.getUpdateFrequency();
    }

    /**
     * Returns update period in milliseconds.
     *
     * @param aFeed feed.
     *
     * @return period in ms or <code>-1</code> if not specified.
     */
    private static long getUpdatePeriod(SyndFeed aFeed)
    {
        SyModule module = (SyModule)aFeed.getModule(SyModule.URI);

        return module == null ? -1 : periodToValue(module.getUpdatePeriod());
    }

        /**
     * Converts the name of period to corresponding value.
     *
     * @param periodName period name.
     *
     * @return value in ms or -1 if period name isn't known or NULL.
     */
    private static long periodToValue(String periodName)
    {
        long period = -1;

        if (SyModule.YEARLY.equalsIgnoreCase(periodName))
        {
            period = Constants.MILLIS_IN_YEAR;
        } else if (SyModule.MONTHLY.equalsIgnoreCase(periodName))
        {
            period = Constants.MILLIS_IN_MONTH;
        } else if (SyModule.WEEKLY.equalsIgnoreCase(periodName))
        {
            period = Constants.MILLIS_IN_WEEK;
        } else if (SyModule.HOURLY.equalsIgnoreCase(periodName))
        {
            period = Constants.MILLIS_IN_HOUR;
        } else if (SyModule.DAILY.equalsIgnoreCase(periodName))
        {
            period = Constants.MILLIS_IN_DAY;
        }

        return period;
    }

    /**
     * Converts item object into internal item format.
     *
     * @param anEntry   source item object.
     * @param aFeedURL  root URL of a feed for the relative links resolution.
     *
     * @return internal object.
     */
    private static Item convertItem(SyndEntry anEntry, URL aFeedURL)
    {
        String text = getEntryText(anEntry);
        String title = anEntry.getTitle();
        if (title != null && title.equals("<No Title>")) title = null;

        // Append enclosure to the end of the article
        List enclosures = anEntry.getEnclosures();
        if (enclosures != null && enclosures.size() > 0)
        {
            for (Object en : enclosures)
            {
                SyndEnclosure enclosure = (SyndEnclosure)en;
                String location = enclosure.getUrl();
                if (location != null)
                {
                    long length = enclosure.getLength();
                    text += formatEnclosure(location, length);
                }
            }
        } else
        {
            // Scan links list for possible enclosures.
            // Note: We do this in "else" block because the method is
            // not very reliable and if there are explicit enclosures
            // mention, we'd better not do this.

            List links = anEntry.getLinks();
            if (links != null) for (Object lnk : links)
            {
                SyndLink link = (SyndLink)lnk;

                String rel = link.getRel();
                long length = link.getLength();
                String location = link.getHref();

                if (length > 0 &&
                    (StringUtils.isEmpty(rel) || "enclosure".equalsIgnoreCase(rel)) &&
                    StringUtils.isNotEmpty(location))
                {
                    text += formatEnclosure(location, length);
                }
            }
        }

        Item item = new Item(text);
        item.setAuthor(anEntry.getAuthor());

        URL itemLink;
        try
        {
            String link = anEntry.getLink();
            itemLink = link == null ? null : new URL(aFeedURL, link);
        } catch (MalformedURLException e)
        {
            itemLink = null;
        }
        item.setLink(itemLink);
        item.setPublicationDate(anEntry.getPublishedDate());
        if (item.getPublicationDate() == null) item.setPublicationDate(anEntry.getUpdatedDate());
        item.setTitle(title);

        // URI
        item.setUri(anEntry.getUri());

        // Use subject or categories as subject
        String subject = null;
        List<SyndCategory> categories = (List<SyndCategory>)anEntry.getCategories();
        if (categories != null && !categories.isEmpty())
        {
            List<String> catsStr = new ArrayList<String>();
            for (SyndCategory category : categories)
            {
                String name = category.getName();
                if (StringUtils.isNotEmpty(name)) catsStr.add(name);
            }

            subject = StringUtils.join(catsStr.iterator(), " ");
        } else
        {
            DCModule dc = (DCModule)anEntry.getModule(DCModule.URI);
            if (dc != null)
            {
                DCSubject dcSubject = dc.getSubject();
                if (dcSubject != null) subject = dcSubject.getValue();
            }
        }
        item.setSubject(subject);

        return item;
    }

    /**
     * Formats an enclosure URL and length for inclusion in the article text.
     *
     * @param location  location.
     * @param length    length in bytes.
     *
     * @return string.
     */
    public static String formatEnclosure(String location, long length)
    {
        String[] linkComponents = location.split("/");
        String filename = linkComponents[linkComponents.length - 1];

        return "<p id=\"bbenclosure\">" +
            "<b>" + Strings.message("feed.parser.enclosure") + "</b> <a href='" + location + "'>" +
            filename + "</a>" + (length > 0 ? " (" + StringUtils.sizeToString(length) + ")" : "") +
            "</p>";
    }

    /**
     * Returns the text of an entry.
     *
     * @param anEntry entry.
     *
     * @return text.
     */
    private static String getEntryText(SyndEntry anEntry)
    {
        String text = null;

        // Check if the RSS/RDF content module is present
        Module module = anEntry.getModule(ContentModule.URI);
        if (module != null)
        {
            ContentModule cmod = (ContentModule)module;
            List encodeds = cmod.getEncodeds();
            if (encodeds != null && encodeds.size() > 0)
            {
                text = (String)encodeds.get(0);
            }
        }

        // If there was no content module, check various content types (Atom)
        if (text == null)
        {
            int type = Integer.MAX_VALUE;
            SyndContent content = null;

            // Select the best content of all available
            List<SyndContent> contents = (List<SyndContent>)anEntry.getContents();
            if (contents != null)
            {
                for (SyndContent cont : contents)
                {
                    int contType = getContentType(cont.getType());
                    if (contType < type)
                    {
                        type = contType;
                        content = cont;
                    }
                }
            }

            if (content == null) content = anEntry.getDescription();

            if (content != null)
            {
                String value = content.getValue();

                // For some mysterious reason Rome doesn't unescape the HTML and
                // Text/HTML content. Do so if necessary.
// Commented out as it seems Rome 0.9 started to unescape feeds.
//                if (TYPE_HTML.equals(content.getType()) ||
//                    TYPE_TEXT_HTML.equals(content.getType()))
//                {
//                    value = StringUtils.quickUnescape(value);
//                }

                text = value;
            }
        }

        // Check DC module
        if (StringUtils.isEmpty(text))
        {
            DCModule dcModule = (DCModule)anEntry.getModule(DCModule.URI);
            if (dcModule != null) text = dcModule.getDescription();
        }

        if (StringUtils.isEmpty(text)) text = Strings.message("feed.parser.no.text");

        return text;
    }

    /**
     * Returns content type preference order.
     *
     * @param contentType type.
     *
     * @return order (the lower, the more preferred).
     */
    private static int getContentType(String contentType)
    {
        return contentType == null ? -1
            : CONTENT_TYPE_PREFERENCE.indexOf(contentType.toLowerCase());
    }


    /**
     * Listener of permanent redirections notifications. Once the notification comes
     * the listener records new URL in the associated result object.
     */
    private static class RedirectionRecorder implements IPermanentRedirectionListener
    {
        private FeedParserResult result;

        /**
         * Creates redirection recorder for a given result object.
         *
         * @param aResult result object.
         */
        public RedirectionRecorder(FeedParserResult aResult)
        {
            result = aResult;
        }

        /**
         * Invoked when redirection detected.
         *
         * @param newLocation new location.
         */
        public void redirectedTo(URL newLocation)
        {
            result.setRedirectionURL(newLocation);
        }
    }
}
TOP

Related Classes of com.salas.bb.utils.parser.RomeFeedParser$RedirectionRecorder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.