From: pukkandan Date: Sat, 29 Jan 2022 19:37:28 +0000 (+0530) Subject: Merge branch 'UP/youtube-dl' into dl/YoutubeSearchURLIE X-Git-Url: http://git.oshgnacknak.de/?a=commitdiff_plain;h=a3373da70c97d356bd4927eff403abd261dd8f9f;p=youtube-dl Merge branch 'UP/youtube-dl' into dl/YoutubeSearchURLIE --- a3373da70c97d356bd4927eff403abd261dd8f9f diff --cc test/test_all_urls.py index 0e1328ede,365b66bad..26df356b4 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@@ -66,19 -66,10 +66,10 @@@ class TestAllURLsMatching(unittest.Test self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:tab']) self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:tab']) - # def test_youtube_search_matching(self): - # self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) - # self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) + def test_youtube_search_matching(self): + self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) + self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) - def test_youtube_extract(self): - assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id) - assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') - assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') - assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc') - assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc') - assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc') - assertExtractId('BaW_jenozKc', 'BaW_jenozKc') - def test_facebook_matching(self): self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268')) self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793')) diff --cc youtube_dl/extractor/youtube.py index bbd3e80d8,62e58c13e..7cd651c67 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@@ -306,79 -305,40 +305,94 @@@ class YoutubeBaseInfoExtractor(InfoExtr return self._parse_json( self._search_regex( r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg', - default='{}'), video_id, fatal=False) + default='{}'), video_id, fatal=False) or {} + + def _extract_video(self, renderer): + video_id = renderer['videoId'] + title = try_get( + renderer, + (lambda x: x['title']['runs'][0]['text'], + lambda x: x['title']['simpleText']), compat_str) + description = try_get( + renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'], + compat_str) + duration = parse_duration(try_get( + renderer, lambda x: x['lengthText']['simpleText'], compat_str)) + view_count_text = try_get( + renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or '' + view_count = str_to_int(self._search_regex( + r'^([\d,]+)', re.sub(r'\s', '', view_count_text), + 'view count', default=None)) + uploader = try_get( + renderer, + (lambda x: x['ownerText']['runs'][0]['text'], + lambda x: x['shortBylineText']['runs'][0]['text']), compat_str) + return { + '_type': 'url', + 'ie_key': YoutubeIE.ie_key(), + 'id': video_id, + 'url': video_id, + 'title': title, + 'description': description, + 'duration': duration, + 'view_count': view_count, + 'uploader': uploader, + } + def _search_results(self, query, params): + data = { + 'context': { + 'client': { + 'clientName': 'WEB', + 'clientVersion': '2.20201021.03.00', + } + }, + 'query': query, + } + if params: + data['params'] = params + for page_num in itertools.count(1): + search = self._download_json( + 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', + video_id='query "%s"' % query, + note='Downloading page %s' % page_num, + errnote='Unable to download API page', fatal=False, + data=json.dumps(data).encode('utf8'), + headers={'content-type': 'application/json'}) + if not search: + break + slr_contents = try_get( + search, + (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'], + lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']), + list) + if not slr_contents: + break - isr_contents = try_get( - slr_contents, - lambda x: x[0]['itemSectionRenderer']['contents'], - list) - if not isr_contents: - break - for content in isr_contents: - if not isinstance(content, dict): ++ for slr_content in slr_contents: ++ isr_contents = try_get( ++ slr_content, ++ lambda x: x['itemSectionRenderer']['contents'], ++ list) ++ if not isr_contents: + continue - video = content.get('videoRenderer') - if not isinstance(video, dict): - continue - video_id = video.get('videoId') - if not video_id: - continue - title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str) - description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str) - duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str)) - view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or '' - view_count = int_or_none(self._search_regex( - r'^(\d+)', re.sub(r'\s', '', view_count_text), - 'view count', default=None)) - uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str) - yield { - '_type': 'url_transparent', - 'ie_key': YoutubeIE.ie_key(), - 'id': video_id, - 'url': video_id, - 'title': title, - 'description': description, - 'duration': duration, - 'view_count': view_count, - 'uploader': uploader, - } ++ for content in isr_contents: ++ if not isinstance(content, dict): ++ continue ++ video = content.get('videoRenderer') ++ if not isinstance(video, dict): ++ continue ++ video_id = video.get('videoId') ++ if not video_id: ++ continue ++ yield self._extract_video(video) + token = try_get( + slr_contents, - lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'], ++ lambda x: x[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'], + compat_str) + if not token: + break + data['continuation'] = token + class YoutubeIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube.com' @@@ -2523,9 -2018,9 +2072,9 @@@ class YoutubeTabIE(YoutubeBaseInfoExtra invidio\.us )/ (?: - (?:channel|c|user|feed)/| + (?:channel|c|user|feed|hashtag)/| (?:playlist|watch)\?.*?\blist=| - (?!(?:watch|embed|v|e)\b) + (?!(?:watch|embed|v|e|results)\b) ) (?P[^/?\#&]+) '''