commit b03d0d064c0e198aa281faacb2b5a74af7628b74
parent d8d6148628b972b6998a8c2a5465f031a44f4004
Author: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Date: Thu, 28 Nov 2013 13:49:00 +0100
[imdb] Fix extraction in python 2.6
Using a regular expression because the html cannot be parsed.
Diffstat:
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py
@@ -38,8 +38,9 @@ class ImdbIE(InfoExtractor):
format_page = self._download_webpage(
compat_urlparse.urljoin(url, f_path),
u'Downloading info for %s format' % f_id)
- json_data = get_element_by_attribute('class', 'imdb-player-data',
- format_page)
+ json_data = self._search_regex(
+ r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',
+ format_page, u'json data', flags=re.DOTALL)
info = json.loads(json_data)
format_info = info['videoPlayerObject']['video']
formats.append({