youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 7d11297f3f91e6ddd3f0caa5ad4dca1a40d6c820
parent 6ad4013d40e839211e2896129eed05ccd40ee963
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Wed, 29 Oct 2014 20:10:07 +0100

Merge branch 'master' of github.com:rg3/youtube-dl

Diffstat:
Mtest/test_utils.py | 6++++--
Myoutube_dl/extractor/__init__.py | 2+-
Myoutube_dl/extractor/naver.py | 6++++++
Myoutube_dl/extractor/niconico.py | 34++++++++++++++++++++++++++++++++++
Myoutube_dl/extractor/trutube.py | 38+++++++++++++++++---------------------
5 files changed, 62 insertions(+), 24 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py @@ -361,12 +361,14 @@ class TestUtil(unittest.TestCase): def test_compat_getenv(self): test_str = 'тест' - os.environ['YOUTUBE-DL-TEST'] = test_str.encode(get_filesystem_encoding()) + os.environ['YOUTUBE-DL-TEST'] = (test_str if sys.version_info >= (3, 0) + else test_str.encode(get_filesystem_encoding())) self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str) def test_compat_expanduser(self): test_str = 'C:\Documents and Settings\тест\Application Data' - os.environ['HOME'] = test_str.encode(get_filesystem_encoding()) + os.environ['HOME'] = (test_str if sys.version_info >= (3, 0) + else test_str.encode(get_filesystem_encoding())) self.assertEqual(compat_expanduser('~'), test_str) if __name__ == '__main__': diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py @@ -252,7 +252,7 @@ from .newstube import NewstubeIE from .nfb import NFBIE from .nfl import NFLIE from .nhl import NHLIE, NHLVideocenterIE -from .niconico import NiconicoIE +from .niconico import NiconicoIE, NiconicoPlaylistIE from .ninegag import NineGagIE from .noco import NocoIE from .normalboots import NormalbootsIE diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..utils import ( compat_urllib_parse, ExtractorError, + clean_html, ) @@ -31,6 +32,11 @@ class NaverIE(InfoExtractor): m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"', webpage) if m_id is None: + m_error = re.search( + r'(?s)<div class="nation_error">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>', + webpage) + if m_error: + raise ExtractorError(clean_html(m_error.group('msg')), expected=True) raise ExtractorError('couldn\'t extract vid and key') vid = m_id.group(1) key = m_id.group(2) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import re +import json from .common import InfoExtractor from ..utils import ( @@ -146,3 +147,36 @@ class NiconicoIE(InfoExtractor): 'duration': duration, 'webpage_url': webpage_url, } + + +class NiconicoPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://www\.nicovideo\.jp/mylist/(?P<id>\d+)' + + _TEST = { + 'url': 'http://www.nicovideo.jp/mylist/27411728', + 'info_dict': { + 'id': '27411728', + 'title': 'AKB48のオールナイトニッポン', + }, + 'playlist_mincount': 225, + } + + def _real_extract(self, url): + list_id = self._match_id(url) + webpage = self._download_webpage(url, list_id) + + entries_json = self._search_regex(r'Mylist\.preload\(\d+, (\[.*\])\);', + webpage, 'entries') + entries = json.loads(entries_json) + entries = [{ + '_type': 'url', + 'ie_key': NiconicoIE.ie_key(), + 'url': 'http://www.nicovideo.jp/watch/%s' % entry['item_id'], + } for entry in entries] + + return { + '_type': 'playlist', + 'title': self._search_regex(r'\s+name: "(.*?)"', webpage, 'title'), + 'id': list_id, + 'entries': entries, + } diff --git a/youtube_dl/extractor/trutube.py b/youtube_dl/extractor/trutube.py @@ -1,13 +1,12 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor +from ..utils import xpath_text class TruTubeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?trutube\.tv/video/(?P<id>[0-9]+)/.*' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>[0-9]+)' + _TESTS = [{ 'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-', 'md5': 'c5b6e301b0a2040b074746cbeaa26ca1', 'info_dict': { @@ -16,29 +15,26 @@ class TruTubeIE(InfoExtractor): 'title': 'Ramses II - Proven To Be A Red Headed Caucasoid', 'thumbnail': 're:^http:.*\.jpg$', } - } + }, { + 'url': 'https://trutube.tv/nuevo/player/embed.php?v=14880', + 'only_matching': True, + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - video_title = self._og_search_title(webpage).strip() - thumbnail = self._search_regex( - r"var splash_img = '([^']+)';", webpage, 'thumbnail', fatal=False) + config = self._download_xml( + 'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id, + video_id, transform_source=lambda s: s.strip()) - all_formats = re.finditer( - r"var (?P<key>[a-z]+)_video_file\s*=\s*'(?P<url>[^']+)';", webpage) - formats = [{ - 'format_id': m.group('key'), - 'quality': -i, - 'url': m.group('url'), - } for i, m in enumerate(all_formats)] - self._sort_formats(formats) + # filehd is always 404 + video_url = xpath_text(config, './file', 'video URL', fatal=True) + title = xpath_text(config, './title', 'title') + thumbnail = xpath_text(config, './image', ' thumbnail') return { 'id': video_id, - 'title': video_title, - 'formats': formats, + 'url': video_url, + 'title': title, 'thumbnail': thumbnail, }