Package org.meb.spdwldr.parser

Source Code of org.meb.spdwldr.parser.EventSourceParserImplV1

package org.meb.spdwldr.parser;

import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.time.DateUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.meb.spdwldr.EventSource;

public class EventSourceParserImplV1 implements EventSourceParser {

  private static final Pattern pattern = Pattern
      .compile("(?i).*(wczoraj|dzisiaj|jutro|pojutrze).*([0-9]{2}):([0-9]{2}).*");

  @Override
  public List<EventSource> parse(String content) {
    Document doc = Jsoup.parse(content);
    Elements eventDivs = doc.select("div.event-page-card");
    ListIterator<Element> eventsDivsIter = eventDivs.listIterator();
    List<EventSource> eventSources = new ArrayList<EventSource>();

    while (eventsDivsIter.hasNext()) {
      EventSource eventSource = null;

      Element eventDiv = eventsDivsIter.next();
      if (eventDiv.select("div.teams").size() != 0) {
        // league event
        String homeTeam = extractTeam(eventDiv, "div.teams div.home a");
        String awayTeam = extractTeam(eventDiv, "div.teams div.away a");
        String eventUri = extractTeamEventUri(eventDiv);
        if (homeTeam != null && awayTeam != null && eventUri != null) {
          eventSource = new EventSource();
          eventSource.setEventName(homeTeam + " - " + awayTeam);
          eventSource.setEventUri("www.sportowefakty.pl" + eventUri);
        }
      } else if (eventDiv.select("div.single").size() != 0) {
        // single event
        Element a = eventDiv.select("div.single a").get(0);
        String eventName = a.text();
        String eventUri = a.attr("href");
        if (eventName != null && eventUri != null) {
          eventSource = new EventSource();
          eventSource.setEventName(eventName);
          eventSource.setEventUri("www.sportowefakty.pl" + eventUri);
        }
      }

      if (eventSource != null) {
        String eventGroup = extractEventGroup(eventDiv);
        eventSource.setEventGroup(eventGroup);
        Date date = extractEventDate(eventDiv);
        if (date != null) {
          eventSource.setEventDate(DateUtils.truncate(date, Calendar.DAY_OF_MONTH));
          eventSource.setEventTime(date);
        }
        eventSources.add(eventSource);
      }
    }
    return eventSources.size() != 0 ? eventSources : null;
  }

  private Date extractEventDate(Element eventDiv) {
    Elements lis = eventDiv.select("ul.additional li");
    Iterator<Element> liIter = lis.iterator();
    while (liIter.hasNext()) {
      Element li = liIter.next();
      String text = li.text();
      if (StringUtils.isNotBlank(text)) {

        Matcher matcher = pattern.matcher(text);
        if (matcher.matches()) {

          String dayString = matcher.group(1).toLowerCase();
          int offset;
          if (dayString.equals("wczoraj")) {
            offset = -1;
          } else if (dayString.equals("jutro")) {
            offset = 1;
          } else if (dayString.equals("pojutrze")) {
            offset = 2;
          } else {
            offset = 0;
          }

          Date date = DateUtils.truncate(new Date(), Calendar.DAY_OF_MONTH);
          date = DateUtils.addDays(date, offset);
          date = DateUtils.setHours(date, Integer.parseInt(matcher.group(2)));
          date = DateUtils.setMinutes(date, Integer.parseInt(matcher.group(3)));
          return date;
        }
      }
    }
    return null;
  }

  private String extractTeam(Element eventDiv, String select) {
    try {
      Element teamA = eventDiv.select(select).get(0);
      String text = teamA.text();
      if (StringUtils.isNotBlank(text)) {
        return text.trim();
      } else {
        return null;
      }
    } catch (Exception e) {
      return null;
    }
  }

  private String extractTeamEventUri(Element eventDiv) {
    try {
      Element teamA = eventDiv.select("div.teams div.home a").get(0);
      String href = teamA.attr("href");
      if (StringUtils.isNotBlank(href)) {
        return href.trim();
      } else {
        return null;
      }
    } catch (Exception e) {
      return null;
    }
  }

  private String extractEventGroup(Element eventDiv) {
    try {
      Element catH2 = eventDiv.previousElementSibling();
      while (!catH2.tagName().toLowerCase().equals("h2")) {
        catH2 = catH2.previousElementSibling();
      }
      Element catA = catH2.select("a").get(0);
      String text = catA.text();
      if (StringUtils.isNotBlank(text)) {
        return text.trim();
      } else {
        return null;
      }
    } catch (Exception e) {
      return null;
    }
  }
}
TOP

Related Classes of org.meb.spdwldr.parser.EventSourceParserImplV1

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.