youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

cracked.py (3138B)


      1 from __future__ import unicode_literals
      2 
      3 import re
      4 
      5 from .common import InfoExtractor
      6 from .youtube import YoutubeIE
      7 from ..utils import (
      8     parse_iso8601,
      9     str_to_int,
     10 )
     11 
     12 
     13 class CrackedIE(InfoExtractor):
     14     _VALID_URL = r'https?://(?:www\.)?cracked\.com/video_(?P<id>\d+)_[\da-z-]+\.html'
     15     _TESTS = [{
     16         'url': 'http://www.cracked.com/video_19070_if-animal-actors-got-e21-true-hollywood-stories.html',
     17         'md5': '89b90b9824e3806ca95072c4d78f13f7',
     18         'info_dict': {
     19             'id': '19070',
     20             'ext': 'mp4',
     21             'title': 'If Animal Actors Got E! True Hollywood Stories',
     22             'timestamp': 1404954000,
     23             'upload_date': '20140710',
     24         }
     25     }, {
     26         # youtube embed
     27         'url': 'http://www.cracked.com/video_19006_4-plot-holes-you-didnt-notice-in-your-favorite-movies.html',
     28         'md5': 'ccd52866b50bde63a6ef3b35016ba8c7',
     29         'info_dict': {
     30             'id': 'EjI00A3rZD0',
     31             'ext': 'mp4',
     32             'title': "4 Plot Holes You Didn't Notice in Your Favorite Movies - The Spit Take",
     33             'description': 'md5:c603708c718b796fe6079e2b3351ffc7',
     34             'upload_date': '20140725',
     35             'uploader_id': 'Cracked',
     36             'uploader': 'Cracked',
     37         }
     38     }]
     39 
     40     def _real_extract(self, url):
     41         video_id = self._match_id(url)
     42 
     43         webpage = self._download_webpage(url, video_id)
     44 
     45         youtube_url = YoutubeIE._extract_url(webpage)
     46         if youtube_url:
     47             return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
     48 
     49         video_url = self._html_search_regex(
     50             [r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'],
     51             webpage, 'video URL')
     52 
     53         title = self._search_regex(
     54             [r'property="?og:title"?\s+content="([^"]+)"', r'class="?title"?>([^<]+)'],
     55             webpage, 'title')
     56 
     57         description = self._search_regex(
     58             r'name="?(?:og:)?description"?\s+content="([^"]+)"',
     59             webpage, 'description', default=None)
     60 
     61         timestamp = self._html_search_regex(
     62             r'"date"\s*:\s*"([^"]+)"', webpage, 'upload date', fatal=False)
     63         if timestamp:
     64             timestamp = parse_iso8601(timestamp[:-6])
     65 
     66         view_count = str_to_int(self._html_search_regex(
     67             r'<span\s+class="?views"? id="?viewCounts"?>([\d,\.]+) Views</span>',
     68             webpage, 'view count', fatal=False))
     69         comment_count = str_to_int(self._html_search_regex(
     70             r'<span\s+id="?commentCounts"?>([\d,\.]+)</span>',
     71             webpage, 'comment count', fatal=False))
     72 
     73         m = re.search(r'_(?P<width>\d+)X(?P<height>\d+)\.mp4$', video_url)
     74         if m:
     75             width = int(m.group('width'))
     76             height = int(m.group('height'))
     77         else:
     78             width = height = None
     79 
     80         return {
     81             'id': video_id,
     82             'url': video_url,
     83             'title': title,
     84             'description': description,
     85             'timestamp': timestamp,
     86             'view_count': view_count,
     87             'comment_count': comment_count,
     88             'height': height,
     89             'width': width,
     90         }