youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 667d96480b4f9c78ceace063415c3424d4d562ea
parent e6fe993c318738fee5a4a2ce7a86c4512e42653a
Author: Sergey M․ <dstftw@gmail.com>
Date:   Sat, 18 Jun 2016 05:42:20 +0700

[pornhd] Detect removed videos and modernize

Diffstat:
Myoutube_dl/extractor/pornhd.py | 28++++++++++++++++++----------
1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py @@ -1,10 +1,10 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor from ..utils import ( + ExtractorError, int_or_none, js_to_json, ) @@ -37,17 +37,17 @@ class PornHdIE(InfoExtractor): title = self._html_search_regex( [r'<span[^>]+class=["\']video-name["\'][^>]*>([^<]+)', r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title') - description = self._html_search_regex( - r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1', - webpage, 'description', fatal=False, group='value') - view_count = int_or_none(self._html_search_regex( - r'(\d+) views\s*<', webpage, 'view count', fatal=False)) - thumbnail = self._search_regex( - r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False) - sources = json.loads(js_to_json(self._search_regex( + sources = self._parse_json(js_to_json(self._search_regex( r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]", - webpage, 'sources'))) + webpage, 'sources', default='{}')), video_id) + + if not sources: + message = self._html_search_regex( + r'(?s)<(div|p)[^>]+class="no-video"[^>]*>(?P<value>.+?)</\1', + webpage, 'error message', group='value') + raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) + formats = [] for format_id, video_url in sources.items(): if not video_url: @@ -61,6 +61,14 @@ class PornHdIE(InfoExtractor): }) self._sort_formats(formats) + description = self._html_search_regex( + r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1', + webpage, 'description', fatal=False, group='value') + view_count = int_or_none(self._html_search_regex( + r'(\d+) views\s*<', webpage, 'view count', fatal=False)) + thumbnail = self._search_regex( + r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False) + return { 'id': video_id, 'display_id': display_id,