youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

tube8.py (3087B)


      1 from __future__ import unicode_literals
      2 
      3 import re
      4 
      5 from ..utils import (
      6     int_or_none,
      7     str_to_int,
      8 )
      9 from .keezmovies import KeezMoviesIE
     10 
     11 
     12 class Tube8IE(KeezMoviesIE):
     13     _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/(?P<id>\d+)'
     14     _TESTS = [{
     15         'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
     16         'md5': '65e20c48e6abff62ed0c3965fff13a39',
     17         'info_dict': {
     18             'id': '229795',
     19             'display_id': 'kasia-music-video',
     20             'ext': 'mp4',
     21             'description': 'hot teen Kasia grinding',
     22             'uploader': 'unknown',
     23             'title': 'Kasia music video',
     24             'age_limit': 18,
     25             'duration': 230,
     26             'categories': ['Teen'],
     27             'tags': ['dancing'],
     28         },
     29     }, {
     30         'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/',
     31         'only_matching': True,
     32     }]
     33 
     34     @staticmethod
     35     def _extract_urls(webpage):
     36         return re.findall(
     37             r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?tube8\.com/embed/(?:[^/]+/)+\d+)',
     38             webpage)
     39 
     40     def _real_extract(self, url):
     41         webpage, info = self._extract_info(url)
     42 
     43         if not info['title']:
     44             info['title'] = self._html_search_regex(
     45                 r'videoTitle\s*=\s*"([^"]+)', webpage, 'title')
     46 
     47         description = self._html_search_regex(
     48             r'(?s)Description:</dt>\s*<dd>(.+?)</dd>', webpage, 'description', fatal=False)
     49         uploader = self._html_search_regex(
     50             r'<span class="username">\s*(.+?)\s*<',
     51             webpage, 'uploader', fatal=False)
     52 
     53         like_count = int_or_none(self._search_regex(
     54             r'rupVar\s*=\s*"(\d+)"', webpage, 'like count', fatal=False))
     55         dislike_count = int_or_none(self._search_regex(
     56             r'rdownVar\s*=\s*"(\d+)"', webpage, 'dislike count', fatal=False))
     57         view_count = str_to_int(self._search_regex(
     58             r'Views:\s*</dt>\s*<dd>([\d,\.]+)',
     59             webpage, 'view count', fatal=False))
     60         comment_count = str_to_int(self._search_regex(
     61             r'<span id="allCommentsCount">(\d+)</span>',
     62             webpage, 'comment count', fatal=False))
     63 
     64         category = self._search_regex(
     65             r'Category:\s*</dt>\s*<dd>\s*<a[^>]+href=[^>]+>([^<]+)',
     66             webpage, 'category', fatal=False)
     67         categories = [category] if category else None
     68 
     69         tags_str = self._search_regex(
     70             r'(?s)Tags:\s*</dt>\s*<dd>(.+?)</(?!a)',
     71             webpage, 'tags', fatal=False)
     72         tags = [t for t in re.findall(
     73             r'<a[^>]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None
     74 
     75         info.update({
     76             'description': description,
     77             'uploader': uploader,
     78             'view_count': view_count,
     79             'like_count': like_count,
     80             'dislike_count': dislike_count,
     81             'comment_count': comment_count,
     82             'categories': categories,
     83             'tags': tags,
     84         })
     85 
     86         return info