youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 21fbf0f955f584ad2d02608850a69a2fd74b65a6
parent 97952bdb78854bf09c688eb535dc7b67265934c1
Author: Throaway <Throaway@null.com>
Date:   Mon, 20 Mar 2017 16:29:39 -0700

[pornhub] Decode obfuscated video URL (closes #12470)

Diffstat:
Myoutube_dl/extractor/pornhub.py | 37++++++++++++++++++++++++++++++++++---
1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py @@ -1,7 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals +import functools import itertools +import operator # import os import re @@ -129,9 +131,38 @@ class PornHubIE(InfoExtractor): tv_webpage = dl_webpage('tv') - video_url = self._search_regex( - r'<video[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//.+?)\1', tv_webpage, - 'video url', group='url') + encoded_url = self._search_regex(r'(var.*mediastring.*)</script>', + tv_webpage, 'encoded url') + assignments = encoded_url.split(";") + js_vars = {} + + def parse_js_value(inp): + inp = re.sub(r'/\*[^*]*\*/', "", inp) + + if "+" in inp: + inps = inp.split("+") + return functools.reduce(operator.concat, map(parse_js_value, inps)) + + inp = inp.strip() + if inp in js_vars: + return js_vars[inp] + + # Hope it's a string! + assert inp.startswith('"') and inp.endswith('"') + return inp[1:-1] + + for assn in assignments: + assn = assn.strip() + if len(assn) == 0: + continue + + assert assn.startswith("var ") + assn = assn[4:] + vname, value = assn.split("=", 1) + + js_vars[vname] = parse_js_value(value) + + video_url = js_vars["mediastring"] title = self._search_regex( r'<h1>([^>]+)</h1>', tv_webpage, 'title', default=None)