From c4a21bc9db1868e8be114f496899f6786b9982ec Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 30 Apr 2015 18:23:35 +0800 Subject: [PATCH] [bilibili] Extract multipart videos (closes #3250) --- youtube_dl/extractor/bilibili.py | 74 +++++++++++++++++++++----------- 1 file changed, 49 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 904d9a8b4..7ca835e31 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import re +import itertools from .common import InfoExtractor from ..utils import ( @@ -14,18 +15,25 @@ from ..utils import ( class BiliBiliIE(InfoExtractor): _VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P[0-9]+)/' - _TEST = { + _TESTS = [{ 'url': 'http://www.bilibili.tv/video/av1074402/', 'md5': '2c301e4dab317596e837c3e7633e7d86', 'info_dict': { - 'id': '1074402', + 'id': '1074402_part1', 'ext': 'flv', 'title': '【金坷垃】金泡沫', 'duration': 308, 'upload_date': '20140420', 'thumbnail': 're:^https?://.+\.jpg', }, - } + }, { + 'url': 'http://www.bilibili.com/video/av1041170/', + 'info_dict': { + 'id': '1041170', + 'title': '【BD1080P】刀语【诸神&异域】', + }, + 'playlist_count': 9, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -57,19 +65,14 @@ class BiliBiliIE(InfoExtractor): cid = self._search_regex(r'cid=(\d+)', webpage, 'cid') + entries = [] + lq_doc = self._download_xml( 'http://interface.bilibili.com/v_cdn_play?appkey=1&cid=%s' % cid, video_id, note='Downloading LQ video info' ) - lq_durl = lq_doc.find('./durl') - formats = [{ - 'format_id': 'lq', - 'quality': 1, - 'url': lq_durl.find('./url').text, - 'filesize': int_or_none( - lq_durl.find('./size'), get_attr='text'), - }] + lq_durls = lq_doc.findall('./durl') hq_doc = self._download_xml( 'http://interface.bilibili.com/playurl?appkey=1&cid=%s' % cid, @@ -77,23 +80,44 @@ class BiliBiliIE(InfoExtractor): note='Downloading HQ video info', fatal=False, ) - if hq_doc is not False: - hq_durl = hq_doc.find('./durl') - formats.append({ - 'format_id': 'hq', - 'quality': 2, - 'ext': 'flv', - 'url': hq_durl.find('./url').text, + hq_durls = hq_doc.findall('./durl') if hq_doc is not False else itertools.repeat(None) + + assert len(lq_durls) == len(hq_durls) + + i = 1 + for lq_durl, hq_durl in zip(lq_durls, hq_durls): + formats = [{ + 'format_id': 'lq', + 'quality': 1, + 'url': lq_durl.find('./url').text, 'filesize': int_or_none( - hq_durl.find('./size'), get_attr='text'), + lq_durl.find('./size'), get_attr='text'), + }] + if hq_durl: + formats.append({ + 'format_id': 'hq', + 'quality': 2, + 'ext': 'flv', + 'url': hq_durl.find('./url').text, + 'filesize': int_or_none( + hq_durl.find('./size'), get_attr='text'), + }) + self._sort_formats(formats) + + entries.append({ + 'id': '%s_part%d' % (video_id, i), + 'title': title, + 'formats': formats, + 'duration': duration, + 'upload_date': upload_date, + 'thumbnail': thumbnail, }) - self._sort_formats(formats) + i += 1 + return { + '_type': 'multi_video', + 'entries': entries, 'id': video_id, - 'title': title, - 'formats': formats, - 'duration': duration, - 'upload_date': upload_date, - 'thumbnail': thumbnail, + 'title': title } -- 2.22.2