echomsk.py (1317B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 8 9 class EchoMskIE(InfoExtractor): 10 _VALID_URL = r'https?://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)' 11 _TEST = { 12 'url': 'http://www.echo.msk.ru/sounds/1464134.html', 13 'md5': '2e44b3b78daff5b458e4dbc37f191f7c', 14 'info_dict': { 15 'id': '1464134', 16 'ext': 'mp3', 17 'title': 'Особое мнение - 29 декабря 2014, 19:08', 18 }, 19 } 20 21 def _real_extract(self, url): 22 video_id = self._match_id(url) 23 24 webpage = self._download_webpage(url, video_id) 25 26 audio_url = self._search_regex( 27 r'<a rel="mp3" href="([^"]+)">', webpage, 'audio URL') 28 29 title = self._html_search_regex( 30 r'<a href="/programs/[^"]+" target="_blank">([^<]+)</a>', 31 webpage, 'title') 32 33 air_date = self._html_search_regex( 34 r'(?s)<div class="date">(.+?)</div>', 35 webpage, 'date', fatal=False, default=None) 36 37 if air_date: 38 air_date = re.sub(r'(\s)\1+', r'\1', air_date) 39 if air_date: 40 title = '%s - %s' % (title, air_date) 41 42 return { 43 'id': video_id, 44 'url': audio_url, 45 'title': title, 46 }