[generic] Automatic detection of flow player and age_limit (Fixes #3576) - youtube-dl

commit 4d805e063c6c4ffd557d7c7cb905a3ed9c926b08
parent 24e5e24166821ce36bad3e6cb0afe109c26b075d
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Sun, 24 Aug 2014 05:31:32 +0200

[generic] Automatic detection of flow player and age_limit (Fixes #3576)

Diffstat:
M youtube_dl/extractor/generic.py  | 32 ++++++++++++++++++++++++++++++++

1 file changed, 32 insertions(+), 0 deletions(-)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
@@ -331,6 +331,18 @@ class GenericIE(InfoExtractor):
             'info_dict': {
                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
             }
+        },
+        # Flowplayer
+        {
+            'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
+            'md5': '9d65602bf31c6e20014319c7d07fba27',
+            'info_dict': {
+                'id': '5123ea6d5e5a7',
+                'ext': 'mp4',
+                'age_limit': 18,
+                'uploader': 'www.handjobhub.com',
+                'title': 'Busty Blonde Siri Tit Fuck While Wank at Handjob Hub',
+            }
         }
     ]
 
@@ -570,6 +582,16 @@ class GenericIE(InfoExtractor):
             r'(?s)<title>(.*?)</title>', webpage, 'video title',
             default='video')
 
+        # Try to detect age limit automatically
+        age_limit = self._rta_search(webpage)
+        # And then there are the jokers who advertise that they use RTA,
+        # but actually don't.
+        AGE_LIMIT_MARKERS = [
+            r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
+        ]
+        if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
+            age_limit = 18
+
         # video uploader is domain name
         video_uploader = self._search_regex(
             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
@@ -834,6 +856,15 @@ class GenericIE(InfoExtractor):
             # Broaden the findall a little bit: JWPlayer JS loader
             found = re.findall(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
         if not found:
+            # Flow player
+            found = re.findall(r'''(?xs)
+                flowplayer\("[^"]+",\s*
+                    \{[^}]+?\}\s*,
+                    \s*{[^}]+? ["']?clip["']?\s*:\s*\{\s*
+                        ["']?url["']?\s*:\s*["']([^"']+)["']
+            ''', webpage)
+            assert found
+        if not found:
             # Try to find twitter cards info
             found = re.findall(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
         if not found:
@@ -884,6 +915,7 @@ class GenericIE(InfoExtractor):
                 'url': video_url,
                 'uploader': video_uploader,
                 'title': video_title,
+                'age_limit': age_limit,
             })
 
         if len(entries) == 1:

	youtube-dl Another place where youtube-dl lives on
	git clone git://git.oshgnacknak.de/youtube-dl.git
	Log \| Files \| Refs \| README \| LICENSE