[comedycentral] fix extraction(closes #27905)
authorRemita Amine <remitamine@gmail.com>
Thu, 21 Jan 2021 22:53:09 +0000 (23:53 +0100)
committerRemita Amine <remitamine@gmail.com>
Thu, 21 Jan 2021 22:53:09 +0000 (23:53 +0100)
youtube_dl/extractor/comedycentral.py
youtube_dl/extractor/extractors.py
youtube_dl/extractor/mtv.py
youtube_dl/extractor/spike.py

index d08b909a68ec2014f2021b193454b0434df69e0c..1bfa912be40e9e5181ca4d2f5ab4f17d87291b1a 100644 (file)
 from __future__ import unicode_literals
 
 from .mtv import MTVServicesInfoExtractor
-from .common import InfoExtractor
 
 
 class ComedyCentralIE(MTVServicesInfoExtractor):
-    _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
-        (video-clips|episodes|cc-studios|video-collections|shows(?=/[^/]+/(?!full-episodes)))
-        /(?P<title>.*)'''
+    _VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?)/(?P<id>[0-9a-z]{6})'
     _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
 
     _TESTS = [{
-        'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
-        'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
+        'url': 'http://www.cc.com/video-clips/5ke9v2/the-daily-show-with-trevor-noah-doc-rivers-and-steve-ballmer---the-nba-player-strike',
+        'md5': 'b8acb347177c680ff18a292aa2166f80',
         'info_dict': {
-            'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
+            'id': '89ccc86e-1b02-4f83-b0c9-1d9592ecd025',
             'ext': 'mp4',
-            'title': 'CC:Stand-Up|August 18, 2013|1|0101|Uncensored - Too Good of a Mother',
-            'description': 'After a certain point, breastfeeding becomes c**kblocking.',
-            'timestamp': 1376798400,
-            'upload_date': '20130818',
+            'title': 'The Daily Show with Trevor Noah|August 28, 2020|25|25149|Doc Rivers and Steve Ballmer - The NBA Player Strike',
+            'description': 'md5:5334307c433892b85f4f5e5ac9ef7498',
+            'timestamp': 1598670000,
+            'upload_date': '20200829',
         },
     }, {
-        'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview',
+        'url': 'http://www.cc.com/episodes/pnzzci/drawn-together--american-idol--parody-clip-show-season-3-ep-314',
         'only_matching': True,
-    }]
-
-
-class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor):
-    _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
-        (?:full-episodes|shows(?=/[^/]+/full-episodes))
-        /(?P<id>[^?]+)'''
-    _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
-
-    _TESTS = [{
-        'url': 'http://www.cc.com/full-episodes/pv391a/the-daily-show-with-trevor-noah-november-28--2016---ryan-speedo-green-season-22-ep-22028',
-        'info_dict': {
-            'description': 'Donald Trump is accused of exploiting his president-elect status for personal gain, Cuban leader Fidel Castro dies, and Ryan Speedo Green discusses "Sing for Your Life."',
-            'title': 'November 28, 2016 - Ryan Speedo Green',
-        },
-        'playlist_count': 4,
     }, {
-        'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        playlist_id = self._match_id(url)
-        webpage = self._download_webpage(url, playlist_id)
-        mgid = self._extract_triforce_mgid(webpage, data_zone='t2_lc_promo1')
-        videos_info = self._get_videos_info(mgid)
-        return videos_info
-
-
-class ToshIE(MTVServicesInfoExtractor):
-    IE_DESC = 'Tosh.0'
-    _VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'
-    _FEED_URL = 'http://tosh.cc.com/feeds/mrss'
-
-    _TESTS = [{
-        'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
-        'info_dict': {
-            'description': 'Tosh asked fans to share their summer plans.',
-            'title': 'Twitter Users Share Summer Plans',
-        },
-        'playlist': [{
-            'md5': 'f269e88114c1805bb6d7653fecea9e06',
-            'info_dict': {
-                'id': '90498ec2-ed00-11e0-aca6-0026b9414f30',
-                'ext': 'mp4',
-                'title': 'Tosh.0|June 9, 2077|2|211|Twitter Users Share Summer Plans',
-                'description': 'Tosh asked fans to share their summer plans.',
-                'thumbnail': r're:^https?://.*\.jpg',
-                # It's really reported to be published on year 2077
-                'upload_date': '20770610',
-                'timestamp': 3390510600,
-                'subtitles': {
-                    'en': 'mincount:3',
-                },
-            },
-        }]
-    }, {
-        'url': 'http://tosh.cc.com/video-collections/x2iz7k/just-plain-foul/m5q4fp',
+        'url': 'https://www.cc.com/video/k3sdvm/the-daily-show-with-jon-stewart-exclusive-the-fourth-estate',
         'only_matching': True,
     }]
 
 
 class ComedyCentralTVIE(MTVServicesInfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/folgen/(?P<id>[0-9a-z]{6})'
     _TESTS = [{
-        'url': 'http://www.comedycentral.tv/staffeln/7436-the-mindy-project-staffel-4',
+        'url': 'https://www.comedycentral.tv/folgen/pxdpec/josh-investigates-klimawandel-staffel-1-ep-1',
         'info_dict': {
-            'id': 'local_playlist-f99b626bdfe13568579a',
-            'ext': 'flv',
-            'title': 'Episode_the-mindy-project_shows_season-4_episode-3_full-episode_part1',
-        },
-        'params': {
-            # rtmp download
-            'skip_download': True,
+            'id': '15907dc3-ec3c-11e8-a442-0e40cf2fc285',
+            'ext': 'mp4',
+            'title': 'Josh Investigates',
+            'description': 'Steht uns das Ende der Welt bevor?',
         },
-    }, {
-        'url': 'http://www.comedycentral.tv/shows/1074-workaholics',
-        'only_matching': True,
-    }, {
-        'url': 'http://www.comedycentral.tv/shows/1727-the-mindy-project/bonus',
-        'only_matching': True,
     }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, video_id)
-
-        mrss_url = self._search_regex(
-            r'data-mrss=(["\'])(?P<url>(?:(?!\1).)+)\1',
-            webpage, 'mrss url', group='url')
-
-        return self._get_videos_info_from_url(mrss_url, video_id)
-
-
-class ComedyCentralShortnameIE(InfoExtractor):
-    _VALID_URL = r'^:(?P<id>tds|thedailyshow|theopposition)$'
-    _TESTS = [{
-        'url': ':tds',
-        'only_matching': True,
-    }, {
-        'url': ':thedailyshow',
-        'only_matching': True,
-    }, {
-        'url': ':theopposition',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        shortcut_map = {
-            'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
-            'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
-            'theopposition': 'http://www.cc.com/shows/the-opposition-with-jordan-klepper/full-episodes',
+    _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
+    _GEO_COUNTRIES = ['DE']
+
+    def _get_feed_query(self, uri):
+        return {
+            'accountOverride': 'intl.mtvi.com',
+            'arcEp': 'web.cc.tv',
+            'ep': 'b9032c3a',
+            'imageEp': 'web.cc.tv',
+            'mgid': uri,
         }
-        return self.url_result(shortcut_map[video_id])
index 52b8db0f9b5f2e156e3d8b54fe317dae6aaebe9a..ef57f555625e7a8bb3aaaaf90ecc13b71ab02c45 100644 (file)
@@ -235,11 +235,8 @@ from .cnn import (
 )
 from .coub import CoubIE
 from .comedycentral import (
-    ComedyCentralFullEpisodesIE,
     ComedyCentralIE,
-    ComedyCentralShortnameIE,
     ComedyCentralTVIE,
-    ToshIE,
 )
 from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
 from .commonprotocols import (
index df1034fc5751dd715b31a5ac6c92d36f410da6c9..f5e30d22d420006eccfa5e5b29409fd93ffc0ec2 100644 (file)
@@ -253,6 +253,10 @@ class MTVServicesInfoExtractor(InfoExtractor):
 
         return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
 
+    @staticmethod
+    def _extract_child_with_type(parent, t):
+        return next(c for c in parent['children'] if c.get('type') == t)
+
     def _extract_mgid(self, webpage):
         try:
             # the url can be http://media.mtvnservices.com/fb/{mgid}.swf
@@ -278,6 +282,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
         if not mgid:
             mgid = self._extract_triforce_mgid(webpage)
 
+        if not mgid:
+            data = self._parse_json(self._search_regex(
+                r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
+            main_container = self._extract_child_with_type(data, 'MainContainer')
+            video_player = self._extract_child_with_type(main_container, 'VideoPlayer')
+            mgid = video_player['props']['media']['video']['config']['uri']
+
         return mgid
 
     def _real_extract(self, url):
@@ -349,18 +360,6 @@ class MTVIE(MTVServicesInfoExtractor):
         'only_matching': True,
     }]
 
-    @staticmethod
-    def extract_child_with_type(parent, t):
-        children = parent['children']
-        return next(c for c in children if c.get('type') == t)
-
-    def _extract_mgid(self, webpage):
-        data = self._parse_json(self._search_regex(
-            r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
-        main_container = self.extract_child_with_type(data, 'MainContainer')
-        video_player = self.extract_child_with_type(main_container, 'VideoPlayer')
-        return video_player['props']['media']['video']['config']['uri']
-
 
 class MTVJapanIE(MTVServicesInfoExtractor):
     IE_NAME = 'mtvjapan'
index 4c5e3f7c25d4e78e461e2ad29e266cab5e0041b4..5805f3d4454030e0111178876607765321cca912 100644 (file)
@@ -20,9 +20,6 @@ class BellatorIE(MTVServicesInfoExtractor):
     _FEED_URL = 'http://www.bellator.com/feeds/mrss/'
     _GEO_COUNTRIES = ['US']
 
-    def _extract_mgid(self, webpage):
-        return self._extract_triforce_mgid(webpage)
-
 
 class ParamountNetworkIE(MTVServicesInfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?paramountnetwork\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
@@ -46,16 +43,6 @@ class ParamountNetworkIE(MTVServicesInfoExtractor):
     def _get_feed_query(self, uri):
         return {
             'arcEp': 'paramountnetwork.com',
+            'imageEp': 'paramountnetwork.com',
             'mgid': uri,
         }
-
-    def _extract_mgid(self, webpage):
-        root_data = self._parse_json(self._search_regex(
-            r'window\.__DATA__\s*=\s*({.+})',
-            webpage, 'data'), None)
-
-        def find_sub_data(data, data_type):
-            return next(c for c in data['children'] if c.get('type') == data_type)
-
-        c = find_sub_data(find_sub_data(root_data, 'MainContainer'), 'VideoPlayer')
-        return c['props']['media']['video']['config']['uri']