photobucket.py (1788B)
1 from __future__ import unicode_literals 2 3 import json 4 import re 5 6 from .common import InfoExtractor 7 from ..compat import compat_urllib_parse_unquote 8 9 10 class PhotobucketIE(InfoExtractor): 11 _VALID_URL = r'https?://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))' 12 _TEST = { 13 'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0', 14 'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99', 15 'info_dict': { 16 'id': 'zpsc0c3b9fa', 17 'ext': 'mp4', 18 'timestamp': 1367669341, 19 'upload_date': '20130504', 20 'uploader': 'rachaneronas', 21 'title': 'Tired of Link Building? Try BacklinkMyDomain.com!', 22 } 23 } 24 25 def _real_extract(self, url): 26 mobj = re.match(self._VALID_URL, url) 27 video_id = mobj.group('id') 28 video_extension = mobj.group('ext') 29 30 webpage = self._download_webpage(url, video_id) 31 32 # Extract URL, uploader, and title from webpage 33 self.report_extraction(video_id) 34 info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);', 35 webpage, 'info json') 36 info = json.loads(info_json) 37 url = compat_urllib_parse_unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url')) 38 return { 39 'id': video_id, 40 'url': url, 41 'uploader': info['username'], 42 'timestamp': info['creationDate'], 43 'title': info['title'], 44 'ext': video_extension, 45 'thumbnail': info['thumbUrl'], 46 }