youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit ecfef3e5bf1bea8a9881b950b4239a0e1b09d10e
parent 3d3538e422a711aab238f4d1ab667d72cc9bbdbf
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Tue,  7 Jan 2014 09:41:13 +0100

+unicode_literals

Diffstat:
Mtest/test_playlists.py | 62++++++++++++++++++++++++++++++++------------------------------
Myoutube_dl/extractor/imdb.py | 38++++++++++++++++++++------------------
2 files changed, 52 insertions(+), 48 deletions(-)

diff --git a/test/test_playlists.py b/test/test_playlists.py @@ -1,6 +1,8 @@ #!/usr/bin/env python # encoding: utf-8 +from __future__ import unicode_literals + # Allow direct execution import os import sys @@ -43,7 +45,7 @@ class TestPlaylists(unittest.TestCase): ie = DailymotionPlaylistIE(dl) result = ie.extract('http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q') self.assertIsPlaylist(result) - self.assertEqual(result['title'], u'SPORT') + self.assertEqual(result['title'], 'SPORT') self.assertTrue(len(result['entries']) > 20) def test_dailymotion_user(self): @@ -51,7 +53,7 @@ class TestPlaylists(unittest.TestCase): ie = DailymotionUserIE(dl) result = ie.extract('http://www.dailymotion.com/user/generation-quoi/') self.assertIsPlaylist(result) - self.assertEqual(result['title'], u'Génération Quoi') + self.assertEqual(result['title'], 'Génération Quoi') self.assertTrue(len(result['entries']) >= 26) def test_vimeo_channel(self): @@ -59,7 +61,7 @@ class TestPlaylists(unittest.TestCase): ie = VimeoChannelIE(dl) result = ie.extract('http://vimeo.com/channels/tributes') self.assertIsPlaylist(result) - self.assertEqual(result['title'], u'Vimeo Tributes') + self.assertEqual(result['title'], 'Vimeo Tributes') self.assertTrue(len(result['entries']) > 24) def test_vimeo_user(self): @@ -67,7 +69,7 @@ class TestPlaylists(unittest.TestCase): ie = VimeoUserIE(dl) result = ie.extract('http://vimeo.com/nkistudio/videos') self.assertIsPlaylist(result) - self.assertEqual(result['title'], u'Nki') + self.assertEqual(result['title'], 'Nki') self.assertTrue(len(result['entries']) > 65) def test_vimeo_album(self): @@ -75,7 +77,7 @@ class TestPlaylists(unittest.TestCase): ie = VimeoAlbumIE(dl) result = ie.extract('http://vimeo.com/album/2632481') self.assertIsPlaylist(result) - self.assertEqual(result['title'], u'Staff Favorites: November 2013') + self.assertEqual(result['title'], 'Staff Favorites: November 2013') self.assertTrue(len(result['entries']) > 12) def test_vimeo_groups(self): @@ -83,7 +85,7 @@ class TestPlaylists(unittest.TestCase): ie = VimeoGroupsIE(dl) result = ie.extract('http://vimeo.com/groups/rolexawards') self.assertIsPlaylist(result) - self.assertEqual(result['title'], u'Rolex Awards for Enterprise') + self.assertEqual(result['title'], 'Rolex Awards for Enterprise') self.assertTrue(len(result['entries']) > 72) def test_ustream_channel(self): @@ -91,7 +93,7 @@ class TestPlaylists(unittest.TestCase): ie = UstreamChannelIE(dl) result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty') self.assertIsPlaylist(result) - self.assertEqual(result['id'], u'5124905') + self.assertEqual(result['id'], '5124905') self.assertTrue(len(result['entries']) >= 11) def test_soundcloud_set(self): @@ -99,7 +101,7 @@ class TestPlaylists(unittest.TestCase): ie = SoundcloudSetIE(dl) result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep') self.assertIsPlaylist(result) - self.assertEqual(result['title'], u'The Royal Concept EP') + self.assertEqual(result['title'], 'The Royal Concept EP') self.assertTrue(len(result['entries']) >= 6) def test_soundcloud_user(self): @@ -107,7 +109,7 @@ class TestPlaylists(unittest.TestCase): ie = SoundcloudUserIE(dl) result = ie.extract('https://soundcloud.com/the-concept-band') self.assertIsPlaylist(result) - self.assertEqual(result['id'], u'9615865') + self.assertEqual(result['id'], '9615865') self.assertTrue(len(result['entries']) >= 12) def test_livestream_event(self): @@ -115,7 +117,7 @@ class TestPlaylists(unittest.TestCase): ie = LivestreamIE(dl) result = ie.extract('http://new.livestream.com/tedx/cityenglish') self.assertIsPlaylist(result) - self.assertEqual(result['title'], u'TEDCity2.0 (English)') + self.assertEqual(result['title'], 'TEDCity2.0 (English)') self.assertTrue(len(result['entries']) >= 4) def test_nhl_videocenter(self): @@ -123,8 +125,8 @@ class TestPlaylists(unittest.TestCase): ie = NHLVideocenterIE(dl) result = ie.extract('http://video.canucks.nhl.com/videocenter/console?catid=999') self.assertIsPlaylist(result) - self.assertEqual(result['id'], u'999') - self.assertEqual(result['title'], u'Highlights') + self.assertEqual(result['id'], '999') + self.assertEqual(result['title'], 'Highlights') self.assertEqual(len(result['entries']), 12) def test_bambuser_channel(self): @@ -132,7 +134,7 @@ class TestPlaylists(unittest.TestCase): ie = BambuserChannelIE(dl) result = ie.extract('http://bambuser.com/channel/pixelversity') self.assertIsPlaylist(result) - self.assertEqual(result['title'], u'pixelversity') + self.assertEqual(result['title'], 'pixelversity') self.assertTrue(len(result['entries']) >= 60) def test_bandcamp_album(self): @@ -140,7 +142,7 @@ class TestPlaylists(unittest.TestCase): ie = BandcampAlbumIE(dl) result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep') self.assertIsPlaylist(result) - self.assertEqual(result['title'], u'Nightmare Night EP') + self.assertEqual(result['title'], 'Nightmare Night EP') self.assertTrue(len(result['entries']) >= 4) def test_smotri_community(self): @@ -148,8 +150,8 @@ class TestPlaylists(unittest.TestCase): ie = SmotriCommunityIE(dl) result = ie.extract('http://smotri.com/community/video/kommuna') self.assertIsPlaylist(result) - self.assertEqual(result['id'], u'kommuna') - self.assertEqual(result['title'], u'КПРФ') + self.assertEqual(result['id'], 'kommuna') + self.assertEqual(result['title'], 'КПРФ') self.assertTrue(len(result['entries']) >= 4) def test_smotri_user(self): @@ -157,17 +159,17 @@ class TestPlaylists(unittest.TestCase): ie = SmotriUserIE(dl) result = ie.extract('http://smotri.com/user/inspector') self.assertIsPlaylist(result) - self.assertEqual(result['id'], u'inspector') - self.assertEqual(result['title'], u'Inspector') + self.assertEqual(result['id'], 'inspector') + self.assertEqual(result['title'], 'Inspector') self.assertTrue(len(result['entries']) >= 9) def test_AcademicEarthCourse(self): dl = FakeYDL() ie = AcademicEarthCourseIE(dl) - result = ie.extract(u'http://academicearth.org/courses/building-dynamic-websites/') + result = ie.extract('http://academicearth.org/courses/building-dynamic-websites/') self.assertIsPlaylist(result) - self.assertEqual(result['id'], u'building-dynamic-websites') - self.assertEqual(result['title'], u'Building Dynamic Websites') + self.assertEqual(result['id'], 'building-dynamic-websites') + self.assertEqual(result['title'], 'Building Dynamic Websites') self.assertEqual(result['description'], u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.") self.assertEqual(len(result['entries']), 10) @@ -176,8 +178,8 @@ class TestPlaylists(unittest.TestCase): ie = IviCompilationIE(dl) result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel') self.assertIsPlaylist(result) - self.assertEqual(result['id'], u'dezhurnyi_angel') - self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012)') + self.assertEqual(result['id'], 'dezhurnyi_angel') + self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012)') self.assertTrue(len(result['entries']) >= 36) def test_ivi_compilation_season(self): @@ -185,8 +187,8 @@ class TestPlaylists(unittest.TestCase): ie = IviCompilationIE(dl) result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel/season2') self.assertIsPlaylist(result) - self.assertEqual(result['id'], u'dezhurnyi_angel/season2') - self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012) 2 сезон') + self.assertEqual(result['id'], 'dezhurnyi_angel/season2') + self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012) 2 сезон') self.assertTrue(len(result['entries']) >= 20) def test_imdb_list(self): @@ -194,8 +196,8 @@ class TestPlaylists(unittest.TestCase): ie = ImdbListIE(dl) result = ie.extract('http://www.imdb.com/list/sMjedvGDd8U') self.assertIsPlaylist(result) - self.assertEqual(result['id'], u'sMjedvGDd8U') - self.assertEqual(result['title'], u'Animated and Family Films') + self.assertEqual(result['id'], 'sMjedvGDd8U') + self.assertEqual(result['title'], 'Animated and Family Films') self.assertTrue(len(result['entries']) >= 48) def test_khanacademy_topic(self): @@ -203,9 +205,9 @@ class TestPlaylists(unittest.TestCase): ie = KhanAcademyIE(dl) result = ie.extract('https://www.khanacademy.org/math/applied-math/cryptography') self.assertIsPlaylist(result) - self.assertEqual(result['id'], u'cryptography') - self.assertEqual(result['title'], u'Journey into cryptography') - self.assertEqual(result['description'], u'How have humans protected their secret messages through history? What has changed today?') + self.assertEqual(result['id'], 'cryptography') + self.assertEqual(result['title'], 'Journey into cryptography') + self.assertEqual(result['description'], 'How have humans protected their secret messages through history? What has changed today?') self.assertTrue(len(result['entries']) >= 3) diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re import json @@ -9,18 +11,18 @@ from ..utils import ( class ImdbIE(InfoExtractor): - IE_NAME = u'imdb' - IE_DESC = u'Internet Movie Database trailers' + IE_NAME = 'imdb' + IE_DESC = 'Internet Movie Database trailers' _VALID_URL = r'http://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)' _TEST = { - u'url': u'http://www.imdb.com/video/imdb/vi2524815897', - u'md5': u'9f34fa777ade3a6e57a054fdbcb3a068', - u'info_dict': { - u'id': u'2524815897', - u'ext': u'mp4', - u'title': u'Ice Age: Continental Drift Trailer (No. 2) - IMDb', - u'description': u'md5:9061c2219254e5d14e03c25c98e96a81', + 'url': 'http://www.imdb.com/video/imdb/vi2524815897', + 'md5': '9f34fa777ade3a6e57a054fdbcb3a068', + 'info_dict': { + 'id': '2524815897', + 'ext': 'mp4', + 'title': 'Ice Age: Continental Drift Trailer (No. 2) - IMDb', + 'description': 'md5:9061c2219254e5d14e03c25c98e96a81', } } @@ -37,10 +39,10 @@ class ImdbIE(InfoExtractor): f_path = f_path.strip() format_page = self._download_webpage( compat_urlparse.urljoin(url, f_path), - u'Downloading info for %s format' % f_id) + 'Downloading info for %s format' % f_id) json_data = self._search_regex( r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>', - format_page, u'json data', flags=re.DOTALL) + format_page, 'json data', flags=re.DOTALL) info = json.loads(json_data) format_info = info['videoPlayerObject']['video'] formats.append({ @@ -56,9 +58,10 @@ class ImdbIE(InfoExtractor): 'thumbnail': format_info['slate'], } + class ImdbListIE(InfoExtractor): - IE_NAME = u'imdb:list' - IE_DESC = u'Internet Movie Database lists' + IE_NAME = 'imdb:list' + IE_DESC = 'Internet Movie Database lists' _VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})' def _real_extract(self, url): @@ -66,13 +69,13 @@ class ImdbListIE(InfoExtractor): list_id = mobj.group('id') # RSS XML is sometimes malformed - rss = self._download_webpage('http://rss.imdb.com/list/%s' % list_id, list_id, u'Downloading list RSS') - list_title = self._html_search_regex(r'<title>(.*?)</title>', rss, u'list title') + rss = self._download_webpage('http://rss.imdb.com/list/%s' % list_id, list_id, 'Downloading list RSS') + list_title = self._html_search_regex(r'<title>(.*?)</title>', rss, 'list title') # Export is independent of actual author_id, but returns 404 if no author_id is provided. # However, passing dummy author_id seems to be enough. csv = self._download_webpage('http://www.imdb.com/list/export?list_id=%s&author_id=ur00000000' % list_id, - list_id, u'Downloading list CSV') + list_id, 'Downloading list CSV') entries = [] for item in csv.split('\n')[1:]: @@ -83,4 +86,4 @@ class ImdbListIE(InfoExtractor): if item_id.startswith('vi'): entries.append(self.url_result('http://www.imdb.com/video/imdb/%s' % item_id, 'Imdb')) - return self.playlist_result(entries, list_id, list_title)- \ No newline at end of file + return self.playlist_result(entries, list_id, list_title)