[crunchyroll] Extract uploader name that's not a link - youtube-dl

commit 7fd465525695bb589fa8932e1e36f38ad511735b
parent fd5c4aab5958a2a086072488913cc190ff028bc3
Author: xbe <xbe@users.noreply.github.com>
Date:   Sun, 26 Feb 2017 03:08:10 -0800

[crunchyroll] Extract uploader name that's not a link

Provide the Crunchyroll extractor with the ability to extract uploader
names that aren't links. Add a test for this new functionality.
This fixes #12267.
Diffstat:
M youtube_dl/extractor/crunchyroll.py  | 20 ++++++++++++++++++--

1 file changed, 18 insertions(+), 2 deletions(-)
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
@@ -207,6 +207,21 @@ class CrunchyrollIE(CrunchyrollBaseIE):
             # Just test metadata extraction
             'skip_download': True,
         },
+    }, {
+        # make sure we can extract an uploader name that's not a link
+        'url': 'http://www.crunchyroll.com/hakuoki-reimeiroku/episode-1-dawn-of-the-divine-warriors-606899',
+        'info_dict': {
+            'id': '606899',
+            'ext': 'mp4',
+            'title': 'Hakuoki Reimeiroku Episode 1 – Dawn of the Divine Warriors',
+            'description': 'Ryunosuke was left to die, but Serizawa-san asked him a simple question "Do you want to live?"',
+            'uploader': 'Geneon Entertainment',
+            'upload_date': '20120717',
+        },
+        'params': {
+            # just test metadata extraction
+            'skip_download': True,
+        },
     }]
 
     _FORMAT_IDS = {
@@ -388,8 +403,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
         if video_upload_date:
             video_upload_date = unified_strdate(video_upload_date)
         video_uploader = self._html_search_regex(
-            r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', webpage,
-            'video_uploader', fatal=False)
+            # try looking for both an uploader that's a link and one that's not
+            [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
+            webpage, 'video_uploader', fatal=False)
 
         available_fmts = []
         for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):

	youtube-dl Another place where youtube-dl lives on
	git clone git://git.oshgnacknak.de/youtube-dl.git
	Log \| Files \| Refs \| README \| LICENSE