[nytimes] Add new cooking.nytimes.com extractor (#27143)
authorJia Rong Yee <28086837+fourjr@users.noreply.github.com>
Sun, 22 Nov 2020 13:12:47 +0000 (21:12 +0800)
committerGitHub <noreply@github.com>
Sun, 22 Nov 2020 13:12:47 +0000 (14:12 +0100)
* [nytimes] support cooking.nytimes.com, resolves #27112

Co-authored-by: remitamine <remitamine@gmail.com>
youtube_dl/extractor/extractors.py
youtube_dl/extractor/nytimes.py

index 31fb4c95a4498b4a7b598cf520db08f519dd5bfb..fb18a0563ca361700c2735b6db473be36d313b25 100644 (file)
@@ -782,6 +782,7 @@ from .ntvru import NTVRuIE
 from .nytimes import (
     NYTimesIE,
     NYTimesArticleIE,
+    NYTimesCookingIE,
 )
 from .nuvid import NuvidIE
 from .nzz import NZZIE
index fc78ca56c90d37b00c1f396aee7c896d54fb91c9..976b1c6944e771b8332e8ac85fe7a5292c972aea 100644 (file)
@@ -221,3 +221,41 @@ class NYTimesArticleIE(NYTimesBaseIE):
              r'NYTD\.FlexTypes\.push\s*\(\s*({.+})\s*\)\s*;'),
             webpage, 'podcast data')
         return self._extract_podcast_from_json(podcast_data, page_id, webpage)
+
+
+class NYTimesCookingIE(NYTimesBaseIE):
+    _VALID_URL = r'https?://cooking\.nytimes\.com/(?:guid|recip)es/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart',
+        'md5': 'dab81fa2eaeb3f9ed47498bdcfcdc1d3',
+        'info_dict': {
+            'id': '100000004756089',
+            'ext': 'mov',
+            'timestamp': 1479383008,
+            'uploader': 'By SHAW LASH, ADAM SAEWITZ and JAMES HERRON',
+            'title': 'Cranberry Tart',
+            'upload_date': '20161117',
+            'description': 'If you are a fan of lemon curd or the classic French tarte au citron, you will love this cranberry version.',
+        },
+    }, {
+        'url': 'https://cooking.nytimes.com/guides/13-how-to-cook-a-turkey',
+        'md5': '4b2e8c70530a89b8d905a2b572316eb8',
+        'info_dict': {
+            'id': '100000003951728',
+            'ext': 'mov',
+            'timestamp': 1445509539,
+            'description': 'Turkey guide',
+            'upload_date': '20151022',
+            'title': 'Turkey',
+        }
+    }]
+
+    def _real_extract(self, url):
+        page_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, page_id)
+
+        video_id = self._search_regex(
+            r'data-video-id=["\'](\d+)', webpage, 'video id')
+
+        return self._extract_video_from_id(video_id)