package org.meb.spdwldr.parser;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.time.DateUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.meb.spdwldr.EventSource;
public class EventSourceParserImplV1 implements EventSourceParser {
private static final Pattern pattern = Pattern
.compile("(?i).*(wczoraj|dzisiaj|jutro|pojutrze).*([0-9]{2}):([0-9]{2}).*");
@Override
public List<EventSource> parse(String content) {
Document doc = Jsoup.parse(content);
Elements eventDivs = doc.select("div.event-page-card");
ListIterator<Element> eventsDivsIter = eventDivs.listIterator();
List<EventSource> eventSources = new ArrayList<EventSource>();
while (eventsDivsIter.hasNext()) {
EventSource eventSource = null;
Element eventDiv = eventsDivsIter.next();
if (eventDiv.select("div.teams").size() != 0) {
// league event
String homeTeam = extractTeam(eventDiv, "div.teams div.home a");
String awayTeam = extractTeam(eventDiv, "div.teams div.away a");
String eventUri = extractTeamEventUri(eventDiv);
if (homeTeam != null && awayTeam != null && eventUri != null) {
eventSource = new EventSource();
eventSource.setEventName(homeTeam + " - " + awayTeam);
eventSource.setEventUri("www.sportowefakty.pl" + eventUri);
}
} else if (eventDiv.select("div.single").size() != 0) {
// single event
Element a = eventDiv.select("div.single a").get(0);
String eventName = a.text();
String eventUri = a.attr("href");
if (eventName != null && eventUri != null) {
eventSource = new EventSource();
eventSource.setEventName(eventName);
eventSource.setEventUri("www.sportowefakty.pl" + eventUri);
}
}
if (eventSource != null) {
String eventGroup = extractEventGroup(eventDiv);
eventSource.setEventGroup(eventGroup);
Date date = extractEventDate(eventDiv);
if (date != null) {
eventSource.setEventDate(DateUtils.truncate(date, Calendar.DAY_OF_MONTH));
eventSource.setEventTime(date);
}
eventSources.add(eventSource);
}
}
return eventSources.size() != 0 ? eventSources : null;
}
private Date extractEventDate(Element eventDiv) {
Elements lis = eventDiv.select("ul.additional li");
Iterator<Element> liIter = lis.iterator();
while (liIter.hasNext()) {
Element li = liIter.next();
String text = li.text();
if (StringUtils.isNotBlank(text)) {
Matcher matcher = pattern.matcher(text);
if (matcher.matches()) {
String dayString = matcher.group(1).toLowerCase();
int offset;
if (dayString.equals("wczoraj")) {
offset = -1;
} else if (dayString.equals("jutro")) {
offset = 1;
} else if (dayString.equals("pojutrze")) {
offset = 2;
} else {
offset = 0;
}
Date date = DateUtils.truncate(new Date(), Calendar.DAY_OF_MONTH);
date = DateUtils.addDays(date, offset);
date = DateUtils.setHours(date, Integer.parseInt(matcher.group(2)));
date = DateUtils.setMinutes(date, Integer.parseInt(matcher.group(3)));
return date;
}
}
}
return null;
}
private String extractTeam(Element eventDiv, String select) {
try {
Element teamA = eventDiv.select(select).get(0);
String text = teamA.text();
if (StringUtils.isNotBlank(text)) {
return text.trim();
} else {
return null;
}
} catch (Exception e) {
return null;
}
}
private String extractTeamEventUri(Element eventDiv) {
try {
Element teamA = eventDiv.select("div.teams div.home a").get(0);
String href = teamA.attr("href");
if (StringUtils.isNotBlank(href)) {
return href.trim();
} else {
return null;
}
} catch (Exception e) {
return null;
}
}
private String extractEventGroup(Element eventDiv) {
try {
Element catH2 = eventDiv.previousElementSibling();
while (!catH2.tagName().toLowerCase().equals("h2")) {
catH2 = catH2.previousElementSibling();
}
Element catA = catH2.select("a").get(0);
String text = catA.text();
if (StringUtils.isNotBlank(text)) {
return text.trim();
} else {
return null;
}
} catch (Exception e) {
return null;
}
}
}