Merge branch 'UP/youtube-dl' into dl/YoutubeSearchURLIE
authorpukkandan <pukkandan.ytdlp@gmail.com>
Sat, 29 Jan 2022 19:37:28 +0000 (01:07 +0530)
committerpukkandan <pukkandan.ytdlp@gmail.com>
Sat, 29 Jan 2022 19:37:28 +0000 (01:07 +0530)
1  2 
test/test_all_urls.py
youtube_dl/extractor/extractors.py
youtube_dl/extractor/youtube.py

index 0e1328ede9ecddcf589173e715a7eb7ca73d1b9c,365b66bad60a427a0267e26bf57a2779060166b2..26df356b49527ed5d863a53efb161231946a93b6
@@@ -66,19 -66,10 +66,10 @@@ class TestAllURLsMatching(unittest.Test
          self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:tab'])
          self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:tab'])
  
 -    def test_youtube_search_matching(self):
 -        self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
 -        self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
 +    def test_youtube_search_matching(self):
 +        self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
 +        self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
  
-     def test_youtube_extract(self):
-         assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
-         assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
-         assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
-         assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
-         assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc')
-         assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
-         assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
      def test_facebook_matching(self):
          self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
          self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793'))
Simple merge
index bbd3e80d854ceee2834eed0773e0b97c1cbecf24,62e58c13e47093f0295f3eb5460a188fdd1c7678..7cd651c677039653109bf47ba10fdca2a3c504e4
@@@ -306,79 -305,40 +305,94 @@@ class YoutubeBaseInfoExtractor(InfoExtr
          return self._parse_json(
              self._search_regex(
                  r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
-                 default='{}'), video_id, fatal=False)
+                 default='{}'), video_id, fatal=False) or {}
+     def _extract_video(self, renderer):
+         video_id = renderer['videoId']
+         title = try_get(
+             renderer,
+             (lambda x: x['title']['runs'][0]['text'],
+              lambda x: x['title']['simpleText']), compat_str)
+         description = try_get(
+             renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
+             compat_str)
+         duration = parse_duration(try_get(
+             renderer, lambda x: x['lengthText']['simpleText'], compat_str))
+         view_count_text = try_get(
+             renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
+         view_count = str_to_int(self._search_regex(
+             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
+             'view count', default=None))
+         uploader = try_get(
+             renderer,
+             (lambda x: x['ownerText']['runs'][0]['text'],
+              lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
+         return {
+             '_type': 'url',
+             'ie_key': YoutubeIE.ie_key(),
+             'id': video_id,
+             'url': video_id,
+             'title': title,
+             'description': description,
+             'duration': duration,
+             'view_count': view_count,
+             'uploader': uploader,
+         }
  
-             isr_contents = try_get(
-                 slr_contents,
-                 lambda x: x[0]['itemSectionRenderer']['contents'],
-                 list)
-             if not isr_contents:
-                 break
-             for content in isr_contents:
-                 if not isinstance(content, dict):
 +    def _search_results(self, query, params):
 +        data = {
 +            'context': {
 +                'client': {
 +                    'clientName': 'WEB',
 +                    'clientVersion': '2.20201021.03.00',
 +                }
 +            },
 +            'query': query,
 +        }
 +        if params:
 +            data['params'] = params
 +        for page_num in itertools.count(1):
 +            search = self._download_json(
 +                'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 +                video_id='query "%s"' % query,
 +                note='Downloading page %s' % page_num,
 +                errnote='Unable to download API page', fatal=False,
 +                data=json.dumps(data).encode('utf8'),
 +                headers={'content-type': 'application/json'})
 +            if not search:
 +                break
 +            slr_contents = try_get(
 +                search,
 +                (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
 +                 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
 +                list)
 +            if not slr_contents:
 +                break
-                 video = content.get('videoRenderer')
-                 if not isinstance(video, dict):
-                     continue
-                 video_id = video.get('videoId')
-                 if not video_id:
-                     continue
-                 title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
-                 description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
-                 duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
-                 view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
-                 view_count = int_or_none(self._search_regex(
-                     r'^(\d+)', re.sub(r'\s', '', view_count_text),
-                     'view count', default=None))
-                 uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
-                 yield {
-                     '_type': 'url_transparent',
-                     'ie_key': YoutubeIE.ie_key(),
-                     'id': video_id,
-                     'url': video_id,
-                     'title': title,
-                     'description': description,
-                     'duration': duration,
-                     'view_count': view_count,
-                     'uploader': uploader,
-                 }
++            for slr_content in slr_contents:
++                isr_contents = try_get(
++                    slr_content,
++                    lambda x: x['itemSectionRenderer']['contents'],
++                    list)
++                if not isr_contents:
 +                    continue
-                 lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
++                for content in isr_contents:
++                    if not isinstance(content, dict):
++                        continue
++                    video = content.get('videoRenderer')
++                    if not isinstance(video, dict):
++                        continue
++                    video_id = video.get('videoId')
++                    if not video_id:
++                        continue
++                    yield self._extract_video(video)
 +            token = try_get(
 +                slr_contents,
++                lambda x: x[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
 +                compat_str)
 +            if not token:
 +                break
 +            data['continuation'] = token
 +
  
  class YoutubeIE(YoutubeBaseInfoExtractor):
      IE_DESC = 'YouTube.com'
@@@ -2523,9 -2018,9 +2072,9 @@@ class YoutubeTabIE(YoutubeBaseInfoExtra
                              invidio\.us
                          )/
                          (?:
-                             (?:channel|c|user|feed)/|
+                             (?:channel|c|user|feed|hashtag)/|
                              (?:playlist|watch)\?.*?\blist=|
 -                            (?!(?:watch|embed|v|e)\b)
 +                            (?!(?:watch|embed|v|e|results)\b)
                          )
                          (?P<id>[^/?\#&]+)
                      '''