[imdb] Fix extraction in python 2.6
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Thu, 28 Nov 2013 12:49:00 +0000 (13:49 +0100)
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Thu, 28 Nov 2013 12:49:00 +0000 (13:49 +0100)
Using a regular expression because the html cannot be parsed.

youtube_dl/extractor/imdb.py

index 07e4f7d29fba010b26965e0656798bc243b809bc..520edc7d0c50a2bc227c2f143a82744f4badc628 100644 (file)
@@ -38,8 +38,9 @@ class ImdbIE(InfoExtractor):
             format_page = self._download_webpage(
                 compat_urlparse.urljoin(url, f_path),
                 u'Downloading info for %s format' % f_id)
-            json_data = get_element_by_attribute('class', 'imdb-player-data',
-                format_page)
+            json_data = self._search_regex(
+                r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',
+                format_page, u'json data', flags=re.DOTALL)
             info = json.loads(json_data)
             format_info = info['videoPlayerObject']['video']
             formats.append({