package com.github.axet.vget.vhs;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.NameValuePair;
import org.apache.http.client.utils.URLEncodedUtils;
import com.github.axet.vget.info.VGetParser;
import com.github.axet.vget.info.VideoInfo;
import com.github.axet.vget.info.VideoInfo.States;
import com.github.axet.vget.info.VideoInfo.VideoQuality;
import com.github.axet.wget.WGet;
import com.github.axet.wget.info.ex.DownloadError;
public class YouTubeParser extends VGetParser {
final static String UTF8 = "UTF-8";
public static class VideoUnavailablePlayer extends DownloadError {
private static final long serialVersionUID = 10905065542230199L;
public VideoUnavailablePlayer() {
super("unavailable-player");
}
}
public static class AgeException extends DownloadError {
private static final long serialVersionUID = 1L;
public AgeException() {
super("Age restriction, account required");
}
}
public static class PrivateVideoException extends DownloadError {
private static final long serialVersionUID = 1L;
public PrivateVideoException() {
super("Private video");
}
public PrivateVideoException(String s) {
super(s);
}
}
public static class EmbeddingDisabled extends DownloadError {
private static final long serialVersionUID = 1L;
public EmbeddingDisabled(String msg) {
super(msg);
}
}
public static class VideoDeleted extends DownloadError {
private static final long serialVersionUID = 1L;
public VideoDeleted(String msg) {
super(msg);
}
}
URL source;
public YouTubeParser(URL input) {
this.source = input;
}
public static boolean probe(URL url) {
return url.toString().contains("youtube.com");
}
public List<VideoDownload> extract(final VideoInfo info, final AtomicBoolean stop, final Runnable notify) {
try {
try {
return extractEmbedded(info, stop, notify);
} catch (EmbeddingDisabled e) {
return streamCpature(info, stop, notify);
}
} catch (RuntimeException e) {
throw e;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
/**
* do not allow to download age restricted videos
*
* @param info
* @param stop
* @param notify
* @throws Exception
*/
List<VideoDownload> streamCpature(final VideoInfo info, final AtomicBoolean stop, final Runnable notify)
throws Exception {
List<VideoDownload> sNextVideoURL = new ArrayList<VideoDownload>();
String html;
html = WGet.getHtml(info.getWeb(), new WGet.HtmlLoader() {
@Override
public void notifyRetry(int delay, Throwable e) {
info.setDelay(delay, e);
notify.run();
}
@Override
public void notifyDownloading() {
info.setState(States.DOWNLOADING);
notify.run();
}
@Override
public void notifyMoved() {
info.setState(States.RETRYING);
notify.run();
}
}, stop);
extractHtmlInfo(sNextVideoURL, info, html, stop, notify);
extractIcon(info, html);
return sNextVideoURL;
}
/**
* Add resolution video for specific youtube link.
*
* @param url
* download source url
* @throws MalformedURLException
*/
void addVideo(List<VideoDownload> sNextVideoURL, String itag, URL url) {
Integer i = Integer.decode(itag);
VideoQuality vd = itagMap.get(i);
sNextVideoURL.add(new VideoDownload(vd, url));
}
// http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
static final Map<Integer, VideoQuality> itagMap = new HashMap<Integer, VideoInfo.VideoQuality>() {
private static final long serialVersionUID = -6925194111122038477L;
{
put(120, VideoQuality.p720);
put(102, VideoQuality.p720);
put(101, VideoQuality.p360);
put(100, VideoQuality.p360);
put(85, VideoQuality.p520);
put(84, VideoQuality.p720);
put(83, VideoQuality.p240);
put(82, VideoQuality.p360);
put(46, VideoQuality.p1080);
put(45, VideoQuality.p720);
put(44, VideoQuality.p480);
put(43, VideoQuality.p360);
put(38, VideoQuality.p3072);
put(37, VideoQuality.p1080);
put(36, VideoQuality.p240);
put(35, VideoQuality.p480);
put(34, VideoQuality.p360);
put(22, VideoQuality.p720);
put(18, VideoQuality.p360);
put(17, VideoQuality.p144);
put(6, VideoQuality.p270);
put(5, VideoQuality.p240);
}
};
public static String extractId(URL url) {
{
Pattern u = Pattern.compile("youtube.com/watch?.*v=([^&]*)");
Matcher um = u.matcher(url.toString());
if (um.find())
return um.group(1);
}
{
Pattern u = Pattern.compile("youtube.com/v/([^&]*)");
Matcher um = u.matcher(url.toString());
if (um.find())
return um.group(1);
}
return null;
}
/**
* allows to download age restricted videos
*
* @param info
* @param stop
* @param notify
* @throws Exception
*/
List<VideoDownload> extractEmbedded(final VideoInfo info, final AtomicBoolean stop, final Runnable notify)
throws Exception {
List<VideoDownload> sNextVideoURL = new ArrayList<VideoDownload>();
String id = extractId(source);
if (id == null) {
throw new RuntimeException("unknown url");
}
info.setTitle(String.format("http://www.youtube.com/watch?v=%s", id));
String get = String.format("http://www.youtube.com/get_video_info?authuser=0&video_id=%s&el=embedded", id);
URL url = new URL(get);
String qs = WGet.getHtml(url, new WGet.HtmlLoader() {
@Override
public void notifyRetry(int delay, Throwable e) {
info.setDelay(delay, e);
notify.run();
}
@Override
public void notifyDownloading() {
info.setState(States.DOWNLOADING);
notify.run();
}
@Override
public void notifyMoved() {
info.setState(States.RETRYING);
notify.run();
}
}, stop);
Map<String, String> map = getQueryMap(qs);
if (map.get("status").equals("fail")) {
String r = URLDecoder.decode(map.get("reason"), UTF8);
if (map.get("errorcode").equals("150"))
throw new EmbeddingDisabled("error code 150");
if (map.get("errorcode").equals("100"))
throw new VideoDeleted("error code 100");
throw new DownloadError(r);
// throw new PrivateVideoException(r);
}
info.setTitle(URLDecoder.decode(map.get("title"), UTF8));
// String fmt_list = URLDecoder.decode(map.get("fmt_list"), UTF8);
// String[] fmts = fmt_list.split(",");
String url_encoded_fmt_stream_map = URLDecoder.decode(map.get("url_encoded_fmt_stream_map"), UTF8);
extractUrlEncodedVideos(sNextVideoURL, url_encoded_fmt_stream_map);
// 'iurlmaxresæ or 'iurlsd' or 'thumbnail_url'
String icon = map.get("thumbnail_url");
icon = URLDecoder.decode(icon, UTF8);
info.setIcon(new URL(icon));
return sNextVideoURL;
}
void extractIcon(VideoInfo info, String html) {
try {
Pattern title = Pattern.compile("itemprop=\"thumbnailUrl\" href=\"(.*)\"");
Matcher titleMatch = title.matcher(html);
if (titleMatch.find()) {
String sline = titleMatch.group(1);
sline = StringEscapeUtils.unescapeHtml4(sline);
info.setIcon(new URL(sline));
}
} catch (RuntimeException e) {
throw e;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public static Map<String, String> getQueryMap(String qs) {
try {
qs = qs.trim();
List<NameValuePair> list;
list = URLEncodedUtils.parse(new URI(null, null, null, -1, null, qs, null), UTF8);
HashMap<String, String> map = new HashMap<String, String>();
for (NameValuePair p : list) {
map.put(p.getName(), p.getValue());
}
return map;
} catch (URISyntaxException e) {
throw new RuntimeException(qs, e);
}
}
void extractHtmlInfo(List<VideoDownload> sNextVideoURL, VideoInfo info, String html, AtomicBoolean stop,
Runnable notify) throws Exception {
{
Pattern age = Pattern.compile("(verify_age)");
Matcher ageMatch = age.matcher(html);
if (ageMatch.find())
throw new AgeException();
}
{
Pattern age = Pattern.compile("(unavailable-player)");
Matcher ageMatch = age.matcher(html);
if (ageMatch.find())
throw new VideoUnavailablePlayer();
}
{
Pattern urlencod = Pattern.compile("\"url_encoded_fmt_stream_map\": \"([^\"]*)\"");
Matcher urlencodMatch = urlencod.matcher(html);
if (urlencodMatch.find()) {
String url_encoded_fmt_stream_map;
url_encoded_fmt_stream_map = urlencodMatch.group(1);
// normal embedded video, unable to grab age restricted videos
Pattern encod = Pattern.compile("url=(.*)");
Matcher encodMatch = encod.matcher(url_encoded_fmt_stream_map);
if (encodMatch.find()) {
String sline = encodMatch.group(1);
extractUrlEncodedVideos(sNextVideoURL, sline);
}
// stream video
Pattern encodStream = Pattern.compile("stream=(.*)");
Matcher encodStreamMatch = encodStream.matcher(url_encoded_fmt_stream_map);
if (encodStreamMatch.find()) {
String sline = encodStreamMatch.group(1);
String[] urlStrings = sline.split("stream=");
for (String urlString : urlStrings) {
urlString = StringEscapeUtils.unescapeJava(urlString);
Pattern link = Pattern.compile("(sparams.*)&itag=(\\d+)&.*&conn=rtmpe(.*),");
Matcher linkMatch = link.matcher(urlString);
if (linkMatch.find()) {
String sparams = linkMatch.group(1);
String itag = linkMatch.group(2);
String url = linkMatch.group(3);
url = "http" + url + "?" + sparams;
url = URLDecoder.decode(url, UTF8);
addVideo(sNextVideoURL, itag, new URL(url));
}
}
}
}
}
{
Pattern title = Pattern.compile("<meta name=\"title\" content=(.*)");
Matcher titleMatch = title.matcher(html);
if (titleMatch.find()) {
String sline = titleMatch.group(1);
String name = sline.replaceFirst("<meta name=\"title\" content=", "").trim();
name = StringUtils.strip(name, "\">");
name = StringEscapeUtils.unescapeHtml4(name);
info.setTitle(name);
}
}
}
void extractUrlEncodedVideos(List<VideoDownload> sNextVideoURL, String sline) throws Exception {
String[] urlStrings = sline.split("url=");
for (String urlString : urlStrings) {
urlString = StringEscapeUtils.unescapeJava(urlString);
String urlFull = URLDecoder.decode(urlString, UTF8);
// universal request
{
String url = null;
{
Pattern link = Pattern.compile("([^&,]*)[&,]");
Matcher linkMatch = link.matcher(urlString);
if (linkMatch.find()) {
url = linkMatch.group(1);
url = URLDecoder.decode(url, UTF8);
}
}
String itag = null;
{
Pattern link = Pattern.compile("itag=(\\d+)");
Matcher linkMatch = link.matcher(urlFull);
if (linkMatch.find()) {
itag = linkMatch.group(1);
}
}
String sig = null;
if (sig == null) {
Pattern link = Pattern.compile("&signature=([^&,]*)");
Matcher linkMatch = link.matcher(urlFull);
if (linkMatch.find()) {
sig = linkMatch.group(1);
}
}
if (sig == null) {
Pattern link = Pattern.compile("sig=([^&,]*)");
Matcher linkMatch = link.matcher(urlFull);
if (linkMatch.find()) {
sig = linkMatch.group(1);
}
}
if (sig == null) {
Pattern link = Pattern.compile("[&,]s=([^&,]*)");
Matcher linkMatch = link.matcher(urlFull);
if (linkMatch.find()) {
sig = linkMatch.group(1);
}
}
if (url != null && itag != null && sig != null) {
try {
url += "&signature=" + sig;
addVideo(sNextVideoURL, itag, new URL(url));
continue;
} catch (MalformedURLException e) {
// ignore bad urls
}
}
}
}
}
}