[mlb] Fallback to extracting video id from webpage for all URLs that does not contain...

author Sergey M․ <dstftw@gmail.com>

Fri, 8 May 2015 14:07:53 +0000 (20:07 +0600)

committer Sergey M․ <dstftw@gmail.com>

Fri, 8 May 2015 14:07:53 +0000 (20:07 +0600)
author Sergey M․ <dstftw@gmail.com>
Fri, 8 May 2015 14:07:53 +0000 (20:07 +0600)
committer Sergey M․ <dstftw@gmail.com>
Fri, 8 May 2015 14:07:53 +0000 (20:07 +0600)
diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py

index ee9ff73bf22f3fbcf769b63fdcbdf96395f1db4a..109eecefd9afffb719486256354bfe8b2fc8c6e7 100644 (file)
--- a/youtube_dl/extractor/mlb.py
+++ b/youtube_dl/extractor/mlb.py
@@ -10,7 +10,21 @@ from ..utils import (
  
  
  class MLBIE(InfoExtractor):
-    _VALID_URL = r'https?://m(?:lb)?\.(?:[\da-z_-]+\.)?mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/(?:embed|m-internal-embed)\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        m(?:lb)?\.(?:[\da-z_-]+\.)?mlb\.com/
+                        (?:
+                            (?:
+                                (?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|
+                                (?:
+                                    shared/video/embed/(?:embed|m-internal-embed)\.html|
+                                    [^/]+/video/play\.jsp
+                                )\?.*?\bcontent_id=
+                            )
+                            (?P<id>n?\d+)|
+                            (?P<path>.+?)
+                        )
+                    '''
      _TESTS = [
          {
              'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
@@ -95,6 +109,12 @@ class MLBIE(InfoExtractor):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('id')
  
+        if not video_id:
+            video_path = mobj.group('path')
+            webpage = self._download_webpage(url, video_path)
+            video_id = self._search_regex(
+                r'data-videoid="(\d+)"', webpage, 'video id')
+
          detail = self._download_xml(
              'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml'
              % (video_id[-3], video_id[-2], video_id[-1], video_id), video_id)
author	Sergey M․ <dstftw@gmail.com>
	Fri, 8 May 2015 14:07:53 +0000 (20:07 +0600)
committer	Sergey M․ <dstftw@gmail.com>
	Fri, 8 May 2015 14:07:53 +0000 (20:07 +0600)