youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

groupon.py (2599B)


      1 from __future__ import unicode_literals
      2 
      3 from .common import InfoExtractor
      4 
      5 
      6 class GrouponIE(InfoExtractor):
      7     _VALID_URL = r'https?://(?:www\.)?groupon\.com/deals/(?P<id>[^/?#&]+)'
      8 
      9     _TEST = {
     10         'url': 'https://www.groupon.com/deals/bikram-yoga-huntington-beach-2#ooid=tubGNycTo_9Uxg82uESj4i61EYX8nyuf',
     11         'info_dict': {
     12             'id': 'bikram-yoga-huntington-beach-2',
     13             'title': '$49 for 10 Yoga Classes or One Month of Unlimited Classes at Bikram Yoga Huntington Beach ($180 Value)',
     14             'description': 'Studio kept at 105 degrees and 40% humidity with anti-microbial and anti-slip Flotex flooring; certified instructors',
     15         },
     16         'playlist': [{
     17             'md5': '42428ce8a00585f9bc36e49226eae7a1',
     18             'info_dict': {
     19                 'id': 'fk6OhWpXgIQ',
     20                 'ext': 'mp4',
     21                 'title': 'Bikram Yoga Huntington Beach | Orange County !tubGNycTo@9Uxg82uESj4i61EYX8nyuf',
     22                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
     23                 'duration': 45,
     24                 'upload_date': '20160405',
     25                 'uploader_id': 'groupon',
     26                 'uploader': 'Groupon',
     27             },
     28             'add_ie': ['Youtube'],
     29         }],
     30         'params': {
     31             'skip_download': True,
     32         },
     33     }
     34 
     35     _PROVIDERS = {
     36         'ooyala': ('ooyala:%s', 'Ooyala'),
     37         'youtube': ('%s', 'Youtube'),
     38     }
     39 
     40     def _real_extract(self, url):
     41         playlist_id = self._match_id(url)
     42         webpage = self._download_webpage(url, playlist_id)
     43 
     44         payload = self._parse_json(self._search_regex(
     45             r'(?:var\s+|window\.)payload\s*=\s*(.*?);\n', webpage, 'payload'), playlist_id)
     46         videos = payload['carousel'].get('dealVideos', [])
     47         entries = []
     48         for v in videos:
     49             provider = v.get('provider')
     50             video_id = v.get('media') or v.get('id') or v.get('baseURL')
     51             if not provider or not video_id:
     52                 continue
     53             url_pattern, ie_key = self._PROVIDERS.get(provider.lower())
     54             if not url_pattern:
     55                 self.report_warning(
     56                     '%s: Unsupported video provider %s, skipping video' %
     57                     (playlist_id, provider))
     58                 continue
     59             entries.append(self.url_result(url_pattern % video_id, ie_key))
     60 
     61         return {
     62             '_type': 'playlist',
     63             'id': playlist_id,
     64             'entries': entries,
     65             'title': self._og_search_title(webpage),
     66             'description': self._og_search_description(webpage),
     67         }