From f0a05a55c2ee512880546c056cfbec5ad3399798 Mon Sep 17 00:00:00 2001 From: dirkf Date: Mon, 17 Jan 2022 03:22:32 +0000 Subject: [PATCH] NJoy: improve extraction of NDR id, description, etc with current page formats --- youtube_dl/extractor/ndr.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index a0d553f00..0a723e3b0 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -196,18 +196,25 @@ class NJoyIE(NDRBaseIE): 'only_matching': True, }] - def _extract_embed(self, webpage, display_id): + def _extract_embed(self, webpage, display_id, url=None): + # find tell-tale URL with the actual ID, or ... video_id = self._search_regex( - r']+id="pp_([\da-z]+)"', webpage, 'embed id') - description = self._search_regex( + (r'''\bsrc\s*=\s*(?:"|')?(?:/\w+)+/([a-z]+\d+)(?!\.)\b''', + r']+id="pp_([\da-z]+)"', ), + webpage, 'NDR id', default=None) + + description = ( + self._html_search_meta('description', webpage) + or self._search_regex( r']+class="subline"[^>]*>[^<]+\s*

([^<]+)

', - webpage, 'description', fatal=False) + webpage, 'description', fatal=False)) return { '_type': 'url_transparent', 'ie_key': 'NDREmbedBase', 'url': 'ndr:%s' % video_id, 'display_id': display_id, 'description': description, + 'title': display_id.replace('-', ' ').strip(), } -- 2.22.2