youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

franceinter.py (2215B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 from .common import InfoExtractor
      5 from ..utils import month_by_name
      6 
      7 
      8 class FranceInterIE(InfoExtractor):
      9     _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)'
     10 
     11     _TEST = {
     12         'url': 'https://www.franceinter.fr/emissions/affaires-sensibles/affaires-sensibles-07-septembre-2016',
     13         'md5': '9e54d7bdb6fdc02a841007f8a975c094',
     14         'info_dict': {
     15             'id': 'affaires-sensibles/affaires-sensibles-07-septembre-2016',
     16             'ext': 'mp3',
     17             'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
     18             'description': 'md5:401969c5d318c061f86bda1fa359292b',
     19             'thumbnail': r're:^https?://.*\.jpg',
     20             'upload_date': '20160907',
     21         },
     22     }
     23 
     24     def _real_extract(self, url):
     25         video_id = self._match_id(url)
     26 
     27         webpage = self._download_webpage(url, video_id)
     28 
     29         video_url = self._search_regex(
     30             r'(?s)<div[^>]+class=["\']page-diffusion["\'][^>]*>.*?<button[^>]+data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
     31             webpage, 'video url', group='url')
     32 
     33         title = self._og_search_title(webpage)
     34         description = self._og_search_description(webpage)
     35         thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
     36 
     37         upload_date_str = self._search_regex(
     38             r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
     39             webpage, 'upload date', fatal=False)
     40         if upload_date_str:
     41             upload_date_list = upload_date_str.split()
     42             upload_date_list.reverse()
     43             upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0)
     44             upload_date_list[2] = '%02d' % int(upload_date_list[2])
     45             upload_date = ''.join(upload_date_list)
     46         else:
     47             upload_date = None
     48 
     49         return {
     50             'id': video_id,
     51             'title': title,
     52             'description': description,
     53             'thumbnail': thumbnail,
     54             'upload_date': upload_date,
     55             'formats': [{
     56                 'url': video_url,
     57                 'vcodec': 'none',
     58             }],
     59         }