Merge branch 'master' of github.com:rg3/youtube-dl - youtube-dl

commit 7d11297f3f91e6ddd3f0caa5ad4dca1a40d6c820
parent 6ad4013d40e839211e2896129eed05ccd40ee963
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Wed, 29 Oct 2014 20:10:07 +0100

Merge branch 'master' of github.com:rg3/youtube-dl

Diffstat:
M test/test_utils.py  | 6 ++++--
M youtube_dl/extractor/__init__.py  | 2 +-
M youtube_dl/extractor/naver.py  | 6 ++++++
M youtube_dl/extractor/niconico.py  | 34 ++++++++++++++++++++++++++++++++++
M youtube_dl/extractor/trutube.py  | 38 +++++++++++++++++---------------------

5 files changed, 62 insertions(+), 24 deletions(-)
diff --git a/test/test_utils.py b/test/test_utils.py
@@ -361,12 +361,14 @@ class TestUtil(unittest.TestCase):
 
     def test_compat_getenv(self):
         test_str = 'тест'
-        os.environ['YOUTUBE-DL-TEST'] = test_str.encode(get_filesystem_encoding())
+        os.environ['YOUTUBE-DL-TEST'] = (test_str if sys.version_info >= (3, 0)
+            else test_str.encode(get_filesystem_encoding()))
         self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str)
 
     def test_compat_expanduser(self):
         test_str = 'C:\Documents and Settings\тест\Application Data'
-        os.environ['HOME'] = test_str.encode(get_filesystem_encoding())
+        os.environ['HOME'] = (test_str if sys.version_info >= (3, 0)
+            else test_str.encode(get_filesystem_encoding()))
         self.assertEqual(compat_expanduser('~'), test_str)
 
 if __name__ == '__main__':
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
@@ -252,7 +252,7 @@ from .newstube import NewstubeIE
 from .nfb import NFBIE
 from .nfl import NFLIE
 from .nhl import NHLIE, NHLVideocenterIE
-from .niconico import NiconicoIE
+from .niconico import NiconicoIE, NiconicoPlaylistIE
 from .ninegag import NineGagIE
 from .noco import NocoIE
 from .normalboots import NormalbootsIE
diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py
@@ -7,6 +7,7 @@ from .common import InfoExtractor
 from ..utils import (
     compat_urllib_parse,
     ExtractorError,
+    clean_html,
 )
 
 
@@ -31,6 +32,11 @@ class NaverIE(InfoExtractor):
         m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
             webpage)
         if m_id is None:
+            m_error = re.search(
+                r'(?s)<div class="nation_error">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',
+                webpage)
+            if m_error:
+                raise ExtractorError(clean_html(m_error.group('msg')), expected=True)
             raise ExtractorError('couldn\'t extract vid and key')
         vid = m_id.group(1)
         key = m_id.group(2)
diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 
 import re
+import json
 
 from .common import InfoExtractor
 from ..utils import (
@@ -146,3 +147,36 @@ class NiconicoIE(InfoExtractor):
             'duration': duration,
             'webpage_url': webpage_url,
         }
+
+
+class NiconicoPlaylistIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.nicovideo\.jp/mylist/(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://www.nicovideo.jp/mylist/27411728',
+        'info_dict': {
+            'id': '27411728',
+            'title': 'AKB48のオールナイトニッポン',
+        },
+        'playlist_mincount': 225,
+    }
+
+    def _real_extract(self, url):
+        list_id = self._match_id(url)
+        webpage = self._download_webpage(url, list_id)
+
+        entries_json = self._search_regex(r'Mylist\.preload\(\d+, (\[.*\])\);',
+            webpage, 'entries')
+        entries = json.loads(entries_json)
+        entries = [{
+            '_type': 'url',
+            'ie_key': NiconicoIE.ie_key(),
+            'url': 'http://www.nicovideo.jp/watch/%s' % entry['item_id'],
+        } for entry in entries]
+
+        return {
+            '_type': 'playlist',
+            'title': self._search_regex(r'\s+name: "(.*?)"', webpage, 'title'),
+            'id': list_id,
+            'entries': entries,
+        }
diff --git a/youtube_dl/extractor/trutube.py b/youtube_dl/extractor/trutube.py
@@ -1,13 +1,12 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
+from ..utils import xpath_text
 
 
 class TruTubeIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?trutube\.tv/video/(?P<id>[0-9]+)/.*'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>[0-9]+)'
+    _TESTS = [{
         'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-',
         'md5': 'c5b6e301b0a2040b074746cbeaa26ca1',
         'info_dict': {
@@ -16,29 +15,26 @@ class TruTubeIE(InfoExtractor):
             'title': 'Ramses II - Proven To Be A Red Headed Caucasoid',
             'thumbnail': 're:^http:.*\.jpg$',
         }
-    }
+    }, {
+        'url': 'https://trutube.tv/nuevo/player/embed.php?v=14880',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, video_id)
-        video_title = self._og_search_title(webpage).strip()
-        thumbnail = self._search_regex(
-            r"var splash_img = '([^']+)';", webpage, 'thumbnail', fatal=False)
+        config = self._download_xml(
+            'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id,
+            video_id, transform_source=lambda s: s.strip())
 
-        all_formats = re.finditer(
-            r"var (?P<key>[a-z]+)_video_file\s*=\s*'(?P<url>[^']+)';", webpage)
-        formats = [{
-            'format_id': m.group('key'),
-            'quality': -i,
-            'url': m.group('url'),
-        } for i, m in enumerate(all_formats)]
-        self._sort_formats(formats)
+        # filehd is always 404
+        video_url = xpath_text(config, './file', 'video URL', fatal=True)
+        title = xpath_text(config, './title', 'title')
+        thumbnail = xpath_text(config, './image', ' thumbnail')
 
         return {
             'id': video_id,
-            'title': video_title,
-            'formats': formats,
+            'url': video_url,
+            'title': title,
             'thumbnail': thumbnail,
         }

	youtube-dl Another place where youtube-dl lives on
	git clone git://git.oshgnacknak.de/youtube-dl.git
	Log \| Files \| Refs \| README \| LICENSE

M	test/test_utils.py	\|	6	++++--
M	youtube_dl/extractor/__init__.py	\|	2	+-
M	youtube_dl/extractor/naver.py	\|	6	++++++
M	youtube_dl/extractor/niconico.py	\|	34	++++++++++++++++++++++++++++++++++
M	youtube_dl/extractor/trutube.py	\|	38	+++++++++++++++++---------------------