Package com.rometools.rome.feed.synd

Examples of com.rometools.rome.feed.synd.SyndFeed


      URL feedUrl = new URL(url);
      URLConnection openConnection = feedUrl.openConnection();
          openConnection.addRequestProperty("User-Agent", "RSS River for Elasticsearch (https://github.com/dadoonet/rssriver)");
      SyndFeedInput input = new SyndFeedInput();
            input.setPreserveWireFeed(true);
      SyndFeed feed = input.build(new XmlReader(openConnection));
      return feed;
    } catch (MalformedURLException e) {
      logger.error("RSS Url is incorrect : [{}].", url);
    } catch (IllegalArgumentException e) {
      logger.error("Feed from [{}] is incorrect.", url);
View Full Code Here


        if (closed) {
          return;
        }
       
        // Let's call the Rss flow
        SyndFeed feed = getFeed(url);
                if (feed != null) {
                    if (logger.isDebugEnabled()) logger.debug("Reading feed from {}", url);
                    Date feedDate = feed.getPublishedDate();
                    if (logger.isDebugEnabled()) logger.debug("Feed publish date is {}", feedDate);

                    String lastupdateField = "_lastupdated_" + UUID.nameUUIDFromBytes(url.getBytes()).toString();
                    Date lastDate = getLastDateFromRiver(lastupdateField);
                    // Comparing dates to see if we have something to do or not
                    if (lastDate == null || (feedDate != null && feedDate.after(lastDate))) {
                        // We have to send results to ES
                        if (logger.isTraceEnabled()) logger.trace("Feed is updated : {}", feed);

                        try {
                            // We have now to send each feed to ES
                            Date mostRecentItemDate = null;
                            for (SyndEntry message : (Iterable<SyndEntry>) feed.getEntries()) {
                                // We don't have a global date, so let's see if we have one in items
                                if (feedDate == null) {
                                    if (message.getUpdatedDate() != null) {
                                        if (lastDate == null || message.getUpdatedDate().after(lastDate)) {
                                            if (mostRecentItemDate == null || message.getUpdatedDate().after(mostRecentItemDate)) {
                                                mostRecentItemDate = message.getUpdatedDate();
                                                if (logger.isTraceEnabled()) logger.trace("No feed date. Using item updated date : {}", feedDate);
                                            }
                                        }
                                    }
                                    if (message.getPublishedDate() != null) {
                                        if (lastDate == null || message.getPublishedDate().after(lastDate)) {
                                            if (mostRecentItemDate == null || message.getPublishedDate().after(mostRecentItemDate)) {
                                                mostRecentItemDate = message.getPublishedDate();
                                                if (logger.isTraceEnabled()) logger.trace("No feed date. Using item published date : {}", feedDate);
                                            }
                                        }
                                    }
                                }

                                String description = "";
                                if (message.getDescription() != null) {
                                    description = message.getDescription().getValue();
                                }

                                // Let's define the rule for UUID generation
                                String id = UUID.nameUUIDFromBytes(description.getBytes()).toString();

                                // Let's look if object already exists
                                GetResponse oldMessage = client.prepareGet(indexName, typeName, id).execute().actionGet();
                                if (!oldMessage.isExists()) {
                                    bulkProcessor.add(indexRequest(indexName).type(typeName).id(id).source(toJson(message, riverName.getName(), feedname, raw)));

                                    if (logger.isDebugEnabled()) logger.debug("FeedMessage update detected for source [{}]", feedname != null ? feedname : "undefined");
                                    if (logger.isTraceEnabled()) logger.trace("FeedMessage is : {}", message);
                                } else {
                                    if (logger.isTraceEnabled()) logger.trace("FeedMessage {} already exist. Ignoring", id);
                                }
                            }

                            if (feedDate == null) {
                                feedDate = mostRecentItemDate;
                            }

                            if (logger.isTraceEnabled()) {
                                logger.trace("processing [_seq  ]: [{}]/[{}]/[{}], last_seq [{}]", indexName, riverName.name(), lastupdateField, feedDate);
                            }
                            // We store the lastupdate date
                            bulkProcessor.add(indexRequest("_river").type(riverName.name()).id(lastupdateField)
                                    .source(jsonBuilder().startObject().startObject("rss").field(lastupdateField, feedDate).endObject().endObject()));
                        } catch (IOException e) {
                            logger.warn("failed to add feed message entry to bulk indexing");
                        }
                    } else {
                        // Nothing new... Just relax !
                        if (logger.isDebugEnabled()) logger.debug("Nothing new in the feed... Relaxing...");
                    }

                    // #8 : Use the ttl rss field to auto adjust feed refresh rate
                    if (!ignoreTtl && feed.originalWireFeed() != null && feed.originalWireFeed() instanceof Channel) {
                        Channel channel = (Channel) feed.originalWireFeed();
                        if (channel.getTtl() > 0) {
                            int minutes = channel.getTtl();
                            if (minutes != updateRate.minutes()) {
                                updateRate = TimeValue.timeValueMinutes(minutes);
                                if (logger.isInfoEnabled())
View Full Code Here

    public static final String JSON = "{\"feedname\":null,\"title\":\"title\",\"author\":\"\",\"description\":\"desc\",\"link\":\"http://link.com/abc\",\"publishedDate\":\"2011-11-10T06:29:02.000Z\",\"source\":null,\"raw\":{},\"location\":{\"lat\":41.8947384616695,\"lon\":12.4839019775391},\"categories\":[\"worldNews\"]}";

    @Test /* this test should be moved somewhere else */
  public void shouldParseRss() throws Exception {
        SyndFeedInput input = new SyndFeedInput();
        SyndFeed feed = input.build(new XmlReader(getClass().getResource("/reuters/rss.xml")));

        assertThat(feed.getEntries().size(), greaterThan(0));
        for (Object o : feed.getEntries()) {
            SyndEntryImpl message = (SyndEntryImpl) o;
            XContentBuilder xcb = toJson(message, null, null, true);
            assertThat(xcb, notNullValue());
        }
  }
View Full Code Here

        assertThat(xContentBuilder.string(), equalTo(JSON));
    }

    private SyndEntryImpl buildEntry() throws FeedException, IOException {
        SyndFeedInput input = new SyndFeedInput();
        SyndFeed feed = input.build(new XmlReader(getClass().getResource("/reuters/rss.xml")));
        return (SyndEntryImpl) feed.getEntries().get(0);
    }
View Full Code Here

    }

    @Test
    public void shouldHaveRawContent() throws Exception {
        SyndFeedInput input = new SyndFeedInput();
        SyndFeed feed = input.build(new XmlReader(getClass().getResource("/dcrainmaker/rss.xml")));

        assertThat(feed.getEntries().size(), greaterThan(0));
        for (Object o : feed.getEntries()) {
            SyndEntryImpl message = (SyndEntryImpl) o;
            XContentBuilder xcb = toJson(message, null, null, true);
            assertThat(xcb, notNullValue());
            assertThat(xcb.string(), containsString("<p>"));
            logger.info(xcb.string());
View Full Code Here

    }

    @Test
    public void shouldNotHaveRawContent() throws Exception {
        SyndFeedInput input = new SyndFeedInput();
        SyndFeed feed = input.build(new XmlReader(getClass().getResource("/dcrainmaker/rss.xml")));

        assertThat(feed.getEntries().size(), greaterThan(0));
        for (Object o : feed.getEntries()) {
            SyndEntryImpl message = (SyndEntryImpl) o;
            XContentBuilder xcb = toJson(message, null, null, false);
            assertThat(xcb, notNullValue());
            assertThat(xcb.string(), new SubstringMatcher("<p>") {
                @Override
View Full Code Here

  @Test
  public void testLeMonde() throws Exception {
    SyndFeedInput input = new SyndFeedInput();
        input.setPreserveWireFeed(true);
    SyndFeed feed = input.build(new XmlReader(getClass().getResource("/lemonde/rss.xml")));

        assertThat(feed, notNullValue());
        assertThat(feed.getEntries().isEmpty(), equalTo(false));

        assertThat(feed.originalWireFeed(), notNullValue());
        assertThat(feed.originalWireFeed(), instanceOf(Channel.class));

        Channel channel = (Channel) feed.originalWireFeed();
        assertThat(channel.getTtl(), equalTo(15));
  }
View Full Code Here

    @Test
    public void testGeoLoc() throws Exception {
        SyndFeedInput input = new SyndFeedInput();
        input.setPreserveWireFeed(true);
        SyndFeed feed = input.build(new XmlReader(getClass().getResource("/reuters/rss.xml")));

        assertThat(feed, notNullValue());
        assertThat(feed.getEntries().isEmpty(), equalTo(false));

        for (Object o : feed.getEntries()) {
            assertThat(o, instanceOf(SyndEntryImpl.class));
            SyndEntryImpl entry = (SyndEntryImpl) o;

            GeoRSSModule geoRSSModule = GeoRSSUtils.getGeoRSS(entry);
            assertThat(geoRSSModule, notNullValue());
View Full Code Here

    if (response.getStatus() != Status.OK.getStatusCode()) {
      return response;
    }
    Entries entries = (Entries) response.getEntity();

    SyndFeed feed = new SyndFeedImpl();
    feed.setFeedType("rss_2.0");
    feed.setTitle("CommaFeed - " + entries.getName());
    feed.setDescription("CommaFeed - " + entries.getName());
    String publicUrl = config.getApplicationSettings().getPublicUrl();
    feed.setLink(publicUrl);

    List<SyndEntry> children = Lists.newArrayList();
    for (Entry entry : entries.getEntries()) {
      children.add(entry.asRss());
    }
    feed.setEntries(children);

    SyndFeedOutput output = new SyndFeedOutput();
    StringWriter writer = new StringWriter();
    try {
      output.output(feed, writer);
View Full Code Here

    if (response.getStatus() != Status.OK.getStatusCode()) {
      return response;
    }
    Entries entries = (Entries) response.getEntity();

    SyndFeed feed = new SyndFeedImpl();
    feed.setFeedType("rss_2.0");
    feed.setTitle("CommaFeed - " + entries.getName());
    feed.setDescription("CommaFeed - " + entries.getName());
    String publicUrl = config.getApplicationSettings().getPublicUrl();
    feed.setLink(publicUrl);

    List<SyndEntry> children = Lists.newArrayList();
    for (Entry entry : entries.getEntries()) {
      children.add(entry.asRss());
    }
    feed.setEntries(children);

    SyndFeedOutput output = new SyndFeedOutput();
    StringWriter writer = new StringWriter();
    try {
      output.output(feed, writer);
View Full Code Here

TOP

Related Classes of com.rometools.rome.feed.synd.SyndFeed

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.