[telebruxelles] Fix extraction (closes #15504)
authorSergey M․ <dstftw@gmail.com>
Mon, 5 Feb 2018 16:43:07 +0000 (23:43 +0700)
committerSergey M․ <dstftw@gmail.com>
Mon, 5 Feb 2018 16:56:00 +0000 (23:56 +0700)
youtube_dl/extractor/telebruxelles.py

index 5886e9c1bb7e0c4e9b192480ac2cfa48118ffe2a..8c7465d063b1bfa8783e037ad645312aa71f13b6 100644 (file)
@@ -38,13 +38,14 @@ class TeleBruxellesIE(InfoExtractor):
         webpage = self._download_webpage(url, display_id)
 
         article_id = self._html_search_regex(
-            r"<article id=\"post-(\d+)\"", webpage, 'article ID', default=None)
+            r'<article[^>]+\bid=["\']post-(\d+)', webpage, 'article ID', default=None)
         title = self._html_search_regex(
-            r'<h1 class=\"entry-title\">(.*?)</h1>', webpage, 'title')
+            r'<h1[^>]*>(.+?)</h1>', webpage, 'title',
+            default=None) or self._og_search_title(webpage)
         description = self._og_search_description(webpage, default=None)
 
         rtmp_url = self._html_search_regex(
-            r'file\s*:\s*"(rtmp://[^/]+/vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*".mp4)"',
+            r'file\s*:\s*"(rtmps?://[^/]+/vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*".mp4)"',
             webpage, 'RTMP url')
         rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url)
         formats = self._extract_wowza_formats(rtmp_url, article_id or display_id)