[motherless] Fix recent videos upload date extraction (closes #27661)
authorcladmi <hartergaetan@gmail.com>
Wed, 16 Dec 2020 08:54:48 +0000 (09:54 +0100)
committerSergey M․ <dstftw@gmail.com>
Tue, 5 Jan 2021 00:40:25 +0000 (07:40 +0700)
Less than a week old videos use a '20h ago' or '1d ago' format.

I kept the support for 'Ago' with uppercase start at is was already in the code.

youtube_dl/extractor/motherless.py

index b1615b4d8e4bce8b580942f717477e6ed57ee92e..6cc36b308fbe1d66d4c191dff468a8b70398e46e 100644 (file)
@@ -85,18 +85,27 @@ class MotherlessIE(InfoExtractor):
             or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
         age_limit = self._rta_search(webpage)
         view_count = str_to_int(self._html_search_regex(
-            (r'>(\d+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'),
+            (r'>([\d,.]+)\s+Views<',  # 1,234,567 Views
+             r'<strong>Views</strong>\s+([^<]+)<'),
             webpage, 'view count', fatal=False))
         like_count = str_to_int(self._html_search_regex(
-            (r'>(\d+)\s+Favorites<', r'<strong>Favorited</strong>\s+([^<]+)<'),
+            (r'>([\d,.]+)\s+Favorites<',  # 1,234 Favorites
+             r'<strong>Favorited</strong>\s+([^<]+)<'),
             webpage, 'like count', fatal=False))
 
         upload_date = self._html_search_regex(
             (r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<',
+             r'class=["\']count[^>]+>(\d+[hd])\s+[aA]go<',  # 20h/1d ago
              r'<strong>Uploaded</strong>\s+([^<]+)<'), webpage, 'upload date')
-        if 'Ago' in upload_date:
-            days = int(re.search(r'([0-9]+)', upload_date).group(1))
-            upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d')
+        relative = re.match(r'(\d+)([hd])$', upload_date)
+        if relative:
+            delta = int(relative.group(1))
+            unit = relative.group(2)
+            if unit == 'h':
+                delta_t = datetime.timedelta(hours=delta)
+            else:  # unit == 'd'
+                delta_t = datetime.timedelta(days=delta)
+            upload_date = (datetime.datetime.now() - delta_t).strftime('%Y%m%d')
         else:
             upload_date = unified_strdate(upload_date)