toypics.py (2746B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 from .common import InfoExtractor 5 import re 6 7 8 class ToypicsIE(InfoExtractor): 9 IE_DESC = 'Toypics video' 10 _VALID_URL = r'https?://videos\.toypics\.net/view/(?P<id>[0-9]+)' 11 _TEST = { 12 'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/', 13 'md5': '16e806ad6d6f58079d210fe30985e08b', 14 'info_dict': { 15 'id': '514', 16 'ext': 'mp4', 17 'title': "Chance-Bulge'd, 2", 18 'age_limit': 18, 19 'uploader': 'kidsune', 20 } 21 } 22 23 def _real_extract(self, url): 24 video_id = self._match_id(url) 25 26 webpage = self._download_webpage(url, video_id) 27 28 formats = self._parse_html5_media_entries( 29 url, webpage, video_id)[0]['formats'] 30 title = self._html_search_regex([ 31 r'<h1[^>]+class=["\']view-video-title[^>]+>([^<]+)</h', 32 r'<title>([^<]+) - Toypics</title>', 33 ], webpage, 'title') 34 35 uploader = self._html_search_regex( 36 r'More videos from <strong>([^<]+)</strong>', webpage, 'uploader', 37 fatal=False) 38 39 return { 40 'id': video_id, 41 'formats': formats, 42 'title': title, 43 'uploader': uploader, 44 'age_limit': 18, 45 } 46 47 48 class ToypicsUserIE(InfoExtractor): 49 IE_DESC = 'Toypics user profile' 50 _VALID_URL = r'https?://videos\.toypics\.net/(?!view)(?P<id>[^/?#&]+)' 51 _TEST = { 52 'url': 'http://videos.toypics.net/Mikey', 53 'info_dict': { 54 'id': 'Mikey', 55 }, 56 'playlist_mincount': 19, 57 } 58 59 def _real_extract(self, url): 60 username = self._match_id(url) 61 62 profile_page = self._download_webpage( 63 url, username, note='Retrieving profile page') 64 65 video_count = int(self._search_regex( 66 r'public/">Public Videos \(([0-9]+)\)</a></li>', profile_page, 67 'video count')) 68 69 PAGE_SIZE = 8 70 urls = [] 71 page_count = (video_count + PAGE_SIZE + 1) // PAGE_SIZE 72 for n in range(1, page_count + 1): 73 lpage_url = url + '/public/%d' % n 74 lpage = self._download_webpage( 75 lpage_url, username, 76 note='Downloading page %d/%d' % (n, page_count)) 77 urls.extend( 78 re.findall( 79 r'<div[^>]+class=["\']preview[^>]+>\s*<a[^>]+href="(https?://videos\.toypics\.net/view/[^"]+)"', 80 lpage)) 81 82 return { 83 '_type': 'playlist', 84 'id': username, 85 'entries': [{ 86 '_type': 'url', 87 'url': eurl, 88 'ie_key': 'Toypics', 89 } for eurl in urls] 90 }