package name.abuchen.portfolio.online.impl;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.URI;
import java.net.URISyntaxException;
import java.text.DateFormatSymbols;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.text.MessageFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Scanner;
import java.util.Set;
import java.util.regex.Pattern;
import name.abuchen.portfolio.Messages;
import name.abuchen.portfolio.model.Exchange;
import name.abuchen.portfolio.model.LatestSecurityPrice;
import name.abuchen.portfolio.model.Security;
import name.abuchen.portfolio.model.SecurityPrice;
import name.abuchen.portfolio.model.Values;
import name.abuchen.portfolio.online.QuoteFeed;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class HTMLTableQuoteFeed implements QuoteFeed
{
private abstract static class Column
{
static final ThreadLocal<DecimalFormat> DECIMAL_FORMAT = new ThreadLocal<DecimalFormat>()
{
protected DecimalFormat initialValue()
{
return new DecimalFormat("#,##0.###", new DecimalFormatSymbols(Locale.GERMAN)); //$NON-NLS-1$
}
};
private final Pattern[] patterns;
protected Column(String[] strings)
{
this.patterns = new Pattern[strings.length];
for (int ii = 0; ii < strings.length; ii++)
this.patterns[ii] = Pattern.compile(strings[ii]);
}
protected boolean matches(Element header)
{
String text = header.text();
for (Pattern pattern : patterns)
{
if (pattern.matcher(text).matches())
return true;
}
return false;
}
abstract void read(Element value, LatestSecurityPrice price) throws ParseException;
protected long asQuote(Element value) throws ParseException
{
String text = value.text();
double quote = DECIMAL_FORMAT.get().parse(text).doubleValue();
return Math.round(quote * 100);
}
}
private static class DateColumn extends Column
{
private DateFormatSymbols alternativeSymbols = null;
@SuppressWarnings("nls")
public DateColumn()
{
super(new String[] { "Datum" });
// some sites return "mär" instead of the default "mrz"
alternativeSymbols = new DateFormatSymbols(Locale.GERMANY);
alternativeSymbols.setShortMonths(new String[] { "jan", "feb", "mär", "apr", "may", "jun", "jul", "aug",
"sep", "oct", "nov", "dec" });
}
@Override
void read(Element value, LatestSecurityPrice price) throws ParseException
{
String text = value.text();
try
{
Date date = new SimpleDateFormat("dd.MM.yy").parse(text); //$NON-NLS-1$
price.setTime(date);
}
catch (ParseException e)
{
try
{
Date date = new SimpleDateFormat("dd. MMM yyyy").parse(text); //$NON-NLS-1$
price.setTime(date);
}
catch (ParseException e2)
{
Date date = new SimpleDateFormat("dd. MMM yyyy", alternativeSymbols).parse(text); //$NON-NLS-1$
price.setTime(date);
}
}
}
}
private static class CloseColumn extends Column
{
@SuppressWarnings("nls")
public CloseColumn()
{
super(new String[] { "Schluss.*", "Schluß.*", "Rücknahmepreis.*", "Close.*" });
}
@Override
void read(Element value, LatestSecurityPrice price) throws ParseException
{
price.setValue(asQuote(value));
}
}
private static class HighColumn extends Column
{
@SuppressWarnings("nls")
public HighColumn()
{
super(new String[] { "Hoch.*", "Tageshoch.*", "Max.*", "High.*" });
}
@Override
void read(Element value, LatestSecurityPrice price) throws ParseException
{
price.setHigh(asQuote(value));
}
}
private static class LowColumn extends Column
{
@SuppressWarnings("nls")
public LowColumn()
{
super(new String[] { "Tief.*", "Tagestief.*", "Low.*" });
}
@Override
void read(Element value, LatestSecurityPrice price) throws ParseException
{
price.setLow(asQuote(value));
}
}
private static class Spec
{
public Spec(Column column, int index)
{
this.column = column;
this.index = index;
}
private final Column column;
private final int index;
}
private final Column[] columns = new Column[] { new DateColumn(), new CloseColumn(), new HighColumn(),
new LowColumn() };
@Override
public String getId()
{
return "GENERIC_HTML_TABLE"; //$NON-NLS-1$
}
@Override
public String getName()
{
return Messages.LabelHTMLTable;
}
@Override
public boolean updateLatestQuotes(List<Security> securities, List<Exception> errors)
{
boolean isUpdated = false;
for (Security security : securities)
{
List<LatestSecurityPrice> quotes = getHistoricalQuotes(security, null, errors);
int size = quotes.size();
if (size > 0)
{
Collections.sort(quotes);
LatestSecurityPrice latest = quotes.get(size - 1);
LatestSecurityPrice previous = size > 1 ? quotes.get(size - 2) : null;
latest.setPreviousClose(previous != null ? previous.getValue() : latest.getValue());
boolean isAdded = security.setLatest(latest);
isUpdated = isUpdated || isAdded;
}
}
return isUpdated;
}
@Override
public boolean updateHistoricalQuotes(Security security, List<Exception> errors)
{
List<LatestSecurityPrice> quotes = getHistoricalQuotes(security, null, errors);
boolean isUpdated = false;
for (LatestSecurityPrice quote : quotes)
{
boolean isAdded = security.addPrice(new SecurityPrice(quote.getTime(), quote.getValue()));
isUpdated = isUpdated || isAdded;
}
return isUpdated;
}
@Override
public List<LatestSecurityPrice> getHistoricalQuotes(Security security, Date start, List<Exception> errors)
{
if (security.getFeedURL() == null || security.getFeedURL().length() == 0)
{
errors.add(new IOException(MessageFormat.format(Messages.MsgMissingFeedURL, security.getName())));
return Collections.emptyList();
}
return parseFromURL(security.getFeedURL(), errors);
}
@Override
public List<LatestSecurityPrice> getHistoricalQuotes(String response, List<Exception> errors)
{
return parseFromHTML(response, errors);
}
@Override
public List<Exchange> getExchanges(Security subject, List<Exception> errors)
{
return null;
}
@SuppressWarnings("nls")
protected List<LatestSecurityPrice> parseFromURL(String url, List<Exception> errors)
{
// without a user agent, some sites serve a mobile/alternative version
String userAgent = null;
String os = System.getProperty("os.name", "unknown").toLowerCase();
if (os.startsWith("windows"))
userAgent = "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36";
else if (os.startsWith("mac"))
userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.73.11 (KHTML, like Gecko) Version/7.0.1 Safari/537.73.11";
else
userAgent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:25.0) Gecko/20100101 Firefox/25.0";
try
{
String escapedUrl = new URI(url).toASCIIString();
return parse(Jsoup.connect(escapedUrl).userAgent(userAgent).get(), errors);
}
catch (URISyntaxException e)
{
errors.add(e);
return Collections.emptyList();
}
catch (IOException e)
{
errors.add(e);
return Collections.emptyList();
}
}
protected List<LatestSecurityPrice> parseFromHTML(String html, List<Exception> errors)
{
return parse(Jsoup.parse(html), errors);
}
private List<LatestSecurityPrice> parse(Document document, List<Exception> errors)
{
List<LatestSecurityPrice> prices = new ArrayList<LatestSecurityPrice>();
// first: find tables
Elements tables = document.getElementsByTag("table"); //$NON-NLS-1$
for (Element table : tables)
{
List<Spec> specs = new ArrayList<Spec>();
int rowIndex = buildSpecFromTable(table, specs);
if (isSpecValid(specs))
{
Elements rows = table.select("> tbody > tr"); //$NON-NLS-1$
int size = rows.size();
for (; rowIndex < size; rowIndex++)
{
Element row = rows.get(rowIndex);
try
{
LatestSecurityPrice price = extractPrice(row, specs);
if (price != null)
prices.add(price);
}
catch (Exception e)
{
errors.add(e);
}
}
// skip all other tables
break;
}
}
// if no quotes could be extract, log HTML for further analysis
if (prices.isEmpty())
errors.add(new IOException(document.html()));
return prices;
}
@SuppressWarnings("nls")
private int buildSpecFromTable(Element table, List<Spec> specs)
{
// check if thead exists
Elements header = table.select("> thead > tr > th");
if (header.size() > 0)
{
buildSpecFromRow(header, specs);
return 0;
}
// check if th exist in body
header = table.select("> tbody > tr > th");
if (header.size() > 0)
{
buildSpecFromRow(header, specs);
return 0;
}
// then check first two regular rows
int rowIndex = 0;
Elements rows = table.select("> tbody > tr");
if (rows.size() > 0)
{
Element firstRow = rows.get(0);
buildSpecFromRow(firstRow.select("> td"), specs);
rowIndex++;
}
if (specs.isEmpty() && rows.size() > 1)
{
Element secondRow = rows.get(1);
buildSpecFromRow(secondRow.select("> td"), specs);
rowIndex++;
}
return rowIndex;
}
private void buildSpecFromRow(Elements row, List<Spec> specs)
{
Set<Column> available = new HashSet<Column>();
for (Column column : columns)
available.add(column);
for (int ii = 0; ii < row.size(); ii++)
{
Element element = row.get(ii);
for (Column column : available)
{
if (column.matches(element))
{
specs.add(new Spec(column, ii));
available.remove(column);
break;
}
}
}
}
private boolean isSpecValid(List<Spec> specs)
{
if (specs == null || specs.isEmpty())
return false;
boolean hasDate = false;
boolean hasClose = false;
for (Spec spec : specs)
{
hasDate = hasDate || spec.column instanceof DateColumn;
hasClose = hasClose || spec.column instanceof CloseColumn;
}
return hasDate && hasClose;
}
private LatestSecurityPrice extractPrice(Element row, List<Spec> specs) throws ParseException
{
Elements cells = row.select("> td"); //$NON-NLS-1$
// row can be empty if it contains only 'th' elements
if (cells.size() == 0)
return null;
LatestSecurityPrice price = new LatestSecurityPrice();
for (Spec spec : specs)
spec.column.read(cells.get(spec.index), price);
return price;
}
/**
* Test method to parse HTML tables
*
* @param args
* list of URLs and/or local files
*/
public static void main(String[] args) throws IOException
{
PrintWriter writer = new PrintWriter(System.out);
for (String arg : args)
doLoad(arg, writer);
writer.flush();
}
@SuppressWarnings("nls")
private static void doLoad(String source, PrintWriter writer) throws IOException
{
writer.println("--------");
writer.println(source);
writer.println("--------");
List<LatestSecurityPrice> prices = null;
List<Exception> errors = new ArrayList<Exception>();
if (source.startsWith("http"))
{
prices = new HTMLTableQuoteFeed().parseFromURL(source, errors);
}
else
{
Scanner scanner = null;
try
{
scanner = new Scanner(new File(source), "UTF-8");
String html = scanner.useDelimiter("\\A").next();
prices = new HTMLTableQuoteFeed().parseFromHTML(html, errors);
}
finally
{
if (scanner != null)
scanner.close();
}
}
for (Exception error : errors)
error.printStackTrace(writer);
for (LatestSecurityPrice p : prices)
{
writer.print(Values.Date.format(p.getTime()));
writer.print("\t");
writer.print(Values.Quote.format(p.getValue()));
writer.print("\t");
writer.print(Values.Quote.format(p.getLow()));
writer.print("\t");
writer.println(Values.Quote.format(p.getHigh()));
}
}
}