[generic] Fix RSS itunes thumbnail extraction (#27405)
authorrenalid <renalid@gmail.com>
Sat, 19 Dec 2020 16:18:51 +0000 (17:18 +0100)
committerGitHub <noreply@github.com>
Sat, 19 Dec 2020 16:18:51 +0000 (23:18 +0700)
youtube_dl/extractor/generic.py

index 80ecaf7950006e283762cefcfc400f8ba45f8430..ab24572e49483f877c319c515352010344c3009c 100644 (file)
@@ -35,6 +35,7 @@ from ..utils import (
     unsmuggle_url,
     UnsupportedError,
     url_or_none,
+    xpath_attr,
     xpath_text,
     xpath_with_ns,
 )
@@ -217,6 +218,30 @@ class GenericIE(InfoExtractor):
                 },
             }],
         },
+        # RSS feed with item with description and thumbnails
+        {
+            'url': 'https://anchor.fm/s/dd00e14/podcast/rss',
+            'info_dict': {
+                'id': 'https://anchor.fm/s/dd00e14/podcast/rss',
+                'title': 're:.*100% Hydrogen.*',
+                'description': 're:.*In this episode.*',
+            },
+            'playlist': [{
+                'info_dict': {
+                    'ext': 'm4a',
+                    'id': 'c1c879525ce2cb640b344507e682c36d',
+                    'title': 're:Hydrogen!',
+                    'description': 're:.*In this episode we are going.*',
+                    'timestamp': int,
+                    'upload_date': '20190908',
+                    'duration': int,
+                    'thumbnail': r're:^https?://.*\.jpg$',
+                },
+            }],
+            'params': {
+                'skip_download': True,
+            },
+        },
         # RSS feed with enclosures and unsupported link URLs
         {
             'url': 'http://www.hellointernet.fm/podcast?format=rss',
@@ -2234,7 +2259,7 @@ class GenericIE(InfoExtractor):
                 'timestamp': unified_timestamp(
                     xpath_text(it, 'pubDate', default=None)),
                 'duration': int_or_none(duration) or parse_duration(duration),
-                'thumbnail': url_or_none(itunes('image')),
+                'thumbnail': url_or_none(xpath_attr(it, xpath_with_ns('./itunes:image', NS_MAP), 'href')),
                 'episode': itunes('title'),
                 'episode_number': int_or_none(itunes('episode')),
                 'season_number': int_or_none(itunes('season')),