[nuevo] Generalize nuevo extractor and add support for trollvids - youtube-dl

commit d570746e45cff3c0f89654bf748e44a5da75a924
parent 4fcd9d147df9b06d954b8f8a1749b50609529ed4
Author: Andrew "Akari" Alexeyew <akari@dbc.1gb.ua>
Date:   Wed,  2 Dec 2015 06:00:47 +0200

[nuevo] Generalize nuevo extractor and add support for trollvids

Supports only the nuevo player for now (most common).

[trollvids] convert duration to an int

[trollvids] added a test

[trollvids] made flake8 shut up

Generalized the Nuevo extractor

Affects: anitube, trollvids, trutube

[nuevo] Complied with the code comments.

Diffstat:
M youtube_dl/extractor/__init__.py  | 1 +
M youtube_dl/extractor/anitube.py  | 34 ++++------------------------------
A youtube_dl/extractor/nuevo.py  | 37 +++++++++++++++++++++++++++++++++++++
A youtube_dl/extractor/trollvids.py  | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
M youtube_dl/extractor/trutube.py  | 23 +++++++----------------

5 files changed, 98 insertions(+), 46 deletions(-)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
@@ -726,6 +726,7 @@ from .toutv import TouTvIE
 from .toypics import ToypicsUserIE, ToypicsIE
 from .traileraddict import TrailerAddictIE
 from .trilulilu import TriluliluIE
+from .trollvids import TrollvidsIE
 from .trutube import TruTubeIE
 from .tube8 import Tube8IE
 from .tubitv import TubiTvIE
diff --git a/youtube_dl/extractor/anitube.py b/youtube_dl/extractor/anitube.py
@@ -2,10 +2,10 @@ from __future__ import unicode_literals
 
 import re
 
-from .common import InfoExtractor
+from .nuevo import NuevoBaseIE
 
 
-class AnitubeIE(InfoExtractor):
+class AnitubeIE(NuevoBaseIE):
     IE_NAME = 'anitube.se'
     _VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
 
@@ -29,31 +29,5 @@ class AnitubeIE(InfoExtractor):
         key = self._search_regex(
             r'src=["\']https?://[^/]+/embed/([A-Za-z0-9_-]+)', webpage, 'key')
 
-        config_xml = self._download_xml(
-            'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key)
-
-        video_title = config_xml.find('title').text
-        thumbnail = config_xml.find('image').text
-        duration = float(config_xml.find('duration').text)
-
-        formats = []
-        video_url = config_xml.find('file')
-        if video_url is not None:
-            formats.append({
-                'format_id': 'sd',
-                'url': video_url.text,
-            })
-        video_url = config_xml.find('filehd')
-        if video_url is not None:
-            formats.append({
-                'format_id': 'hd',
-                'url': video_url.text,
-            })
-
-        return {
-            'id': video_id,
-            'title': video_title,
-            'thumbnail': thumbnail,
-            'duration': duration,
-            'formats': formats
-        }
+        config_url = 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key
+        return self._extract_nuevo(config_url, video_id)
diff --git a/youtube_dl/extractor/nuevo.py b/youtube_dl/extractor/nuevo.py
@@ -0,0 +1,37 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+from ..utils import (
+    float_or_none,
+    xpath_text
+)
+
+
+class NuevoBaseIE(InfoExtractor):
+    def _extract_nuevo(self, config_url, video_id):
+        tree = self._download_xml(config_url, video_id, transform_source=lambda s: s.strip())
+
+        title = xpath_text(tree, './title')
+        if title:
+            title = title.strip()
+
+        thumbnail = xpath_text(tree, './image')
+        duration = float_or_none(xpath_text(tree, './duration'))
+
+        formats = []
+        for element_name, format_id in (('file', 'sd'), ('filehd', 'hd')):
+            video_url = tree.find(element_name)
+            video_url is None or formats.append({
+                'format_id': format_id,
+                'url': video_url.text
+            })
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'formats': formats
+        }
diff --git a/youtube_dl/extractor/trollvids.py b/youtube_dl/extractor/trollvids.py
@@ -0,0 +1,49 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+from .nuevo import NuevoBaseIE
+
+from ..compat import (
+    compat_urllib_parse_unquote
+)
+
+import re
+
+
+class TrollvidsIE(NuevoBaseIE):
+    _VALID_URL = r'http://(?:www\.)?trollvids\.com/+video/+(?P<id>[0-9]+)/+(?P<title>[^?&]+)'
+    IE_NAME = 'trollvids'
+
+    def _real_extract(self, url):
+        match = re.match(self._VALID_URL, url)
+
+        video_id = match.group('id')
+        raw_video_title = match.group('title')
+        url = 'http://trollvids.com/video/%s/%s' % (video_id, raw_video_title)
+        config_url = 'http://trollvids.com/nuevo/player/config.php?v=%s' % video_id
+
+        info = self._extract_nuevo(config_url, video_id)
+
+        info.update({
+            'webpage_url': url,
+            'age_limit': 18
+        })
+
+        if 'title' not in info:
+            info['title'] = compat_urllib_parse_unquote(raw_video_title)
+
+        return info
+
+    _TESTS = [
+        {
+            'url': 'http://trollvids.com/video/2349002/%E3%80%90MMD-R-18%E3%80%91%E3%82%AC%E3%83%BC%E3%83%AB%E3%83%95%E3%83%AC%E3%83%B3%E3%83%89-carrymeoff',
+            'md5': '1d53866b2c514b23ed69e4352fdc9839',
+            'info_dict': {
+                'id': '2349002',
+                'ext': 'mp4',
+                'title': "【MMD R-18】ガールフレンド carry_me_off",
+                'age_limit': 18,
+                'duration': 216.78,
+            },
+        },
+    ]
diff --git a/youtube_dl/extractor/trutube.py b/youtube_dl/extractor/trutube.py
@@ -1,10 +1,9 @@
 from __future__ import unicode_literals
 
-from .common import InfoExtractor
-from ..utils import xpath_text
+from .nuevo import NuevoBaseIE
 
 
-class TruTubeIE(InfoExtractor):
+class TruTubeIE(NuevoBaseIE):
     _VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>[0-9]+)'
     _TESTS = [{
         'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-',
@@ -22,19 +21,11 @@ class TruTubeIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
+        config_url = 'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id
 
-        config = self._download_xml(
-            'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id,
-            video_id, transform_source=lambda s: s.strip())
+        info = self._extract_nuevo(config_url, video_id)
 
-        # filehd is always 404
-        video_url = xpath_text(config, './file', 'video URL', fatal=True)
-        title = xpath_text(config, './title', 'title').strip()
-        thumbnail = xpath_text(config, './image', ' thumbnail')
+        # filehd always 404s
+        info['formats'] = info['formats'][:1]
 
-        return {
-            'id': video_id,
-            'url': video_url,
-            'title': title,
-            'thumbnail': thumbnail,
-        }
+        return info

	youtube-dl Another place where youtube-dl lives on
	git clone git://git.oshgnacknak.de/youtube-dl.git
	Log \| Files \| Refs \| README \| LICENSE

M	youtube_dl/extractor/__init__.py	\|	1	+
M	youtube_dl/extractor/anitube.py	\|	34	++++------------------------------
A	youtube_dl/extractor/nuevo.py	\|	37	+++++++++++++++++++++++++++++++++++++
A	youtube_dl/extractor/trollvids.py	\|	49	+++++++++++++++++++++++++++++++++++++++++++++++++
M	youtube_dl/extractor/trutube.py	\|	23	+++++++----------------