self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:tab'])
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:tab'])
- # def test_youtube_search_matching(self):
- # self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
- # self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
+ def test_youtube_search_matching(self):
+ self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
+ self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
- def test_youtube_extract(self):
- assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
- assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
- assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
- assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
- assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc')
- assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
- assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
-
def test_facebook_matching(self):
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793'))
return self._parse_json(
self._search_regex(
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
- default='{}'), video_id, fatal=False)
+ default='{}'), video_id, fatal=False) or {}
+
+ def _extract_video(self, renderer):
+ video_id = renderer['videoId']
+ title = try_get(
+ renderer,
+ (lambda x: x['title']['runs'][0]['text'],
+ lambda x: x['title']['simpleText']), compat_str)
+ description = try_get(
+ renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
+ compat_str)
+ duration = parse_duration(try_get(
+ renderer, lambda x: x['lengthText']['simpleText'], compat_str))
+ view_count_text = try_get(
+ renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
+ view_count = str_to_int(self._search_regex(
+ r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
+ 'view count', default=None))
+ uploader = try_get(
+ renderer,
+ (lambda x: x['ownerText']['runs'][0]['text'],
+ lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
+ return {
+ '_type': 'url',
+ 'ie_key': YoutubeIE.ie_key(),
+ 'id': video_id,
+ 'url': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'uploader': uploader,
+ }
- isr_contents = try_get(
- slr_contents,
- lambda x: x[0]['itemSectionRenderer']['contents'],
- list)
- if not isr_contents:
- break
- for content in isr_contents:
- if not isinstance(content, dict):
+ def _search_results(self, query, params):
+ data = {
+ 'context': {
+ 'client': {
+ 'clientName': 'WEB',
+ 'clientVersion': '2.20201021.03.00',
+ }
+ },
+ 'query': query,
+ }
+ if params:
+ data['params'] = params
+ for page_num in itertools.count(1):
+ search = self._download_json(
+ 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+ video_id='query "%s"' % query,
+ note='Downloading page %s' % page_num,
+ errnote='Unable to download API page', fatal=False,
+ data=json.dumps(data).encode('utf8'),
+ headers={'content-type': 'application/json'})
+ if not search:
+ break
+ slr_contents = try_get(
+ search,
+ (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
+ lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
+ list)
+ if not slr_contents:
+ break
- video = content.get('videoRenderer')
- if not isinstance(video, dict):
- continue
- video_id = video.get('videoId')
- if not video_id:
- continue
- title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
- description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
- duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
- view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
- view_count = int_or_none(self._search_regex(
- r'^(\d+)', re.sub(r'\s', '', view_count_text),
- 'view count', default=None))
- uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
- yield {
- '_type': 'url_transparent',
- 'ie_key': YoutubeIE.ie_key(),
- 'id': video_id,
- 'url': video_id,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'view_count': view_count,
- 'uploader': uploader,
- }
++ for slr_content in slr_contents:
++ isr_contents = try_get(
++ slr_content,
++ lambda x: x['itemSectionRenderer']['contents'],
++ list)
++ if not isr_contents:
+ continue
- lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
++ for content in isr_contents:
++ if not isinstance(content, dict):
++ continue
++ video = content.get('videoRenderer')
++ if not isinstance(video, dict):
++ continue
++ video_id = video.get('videoId')
++ if not video_id:
++ continue
++ yield self._extract_video(video)
+ token = try_get(
+ slr_contents,
++ lambda x: x[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
+ compat_str)
+ if not token:
+ break
+ data['continuation'] = token
+
class YoutubeIE(YoutubeBaseInfoExtractor):
IE_DESC = 'YouTube.com'
invidio\.us
)/
(?:
- (?:channel|c|user|feed)/|
+ (?:channel|c|user|feed|hashtag)/|
(?:playlist|watch)\?.*?\blist=|
- (?!(?:watch|embed|v|e)\b)
+ (?!(?:watch|embed|v|e|results)\b)
)
(?P<id>[^/?\#&]+)
'''