youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit ad3bc6acd5d6724875b9fa59f9b5cdb9b904ec91
parent 5afa7f8beefcd9b34035f821ad1cecbcf49a6db8
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Thu, 15 May 2014 12:41:42 +0200

Document and test categories (#2923)

Diffstat:
Myoutube_dl/extractor/common.py | 2++
Myoutube_dl/extractor/youtube.py | 16+++++++++-------
2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py @@ -113,6 +113,8 @@ class InfoExtractor(object): webpage_url: The url to the video webpage, if given to youtube-dl it should allow to get the same result again. (It will be set by YoutubeDL if it's missing) + categories: A list of categories that the video falls in, for example + ["Sports", "Berlin"] Unless mentioned otherwise, the fields should be Unicode strings. diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py @@ -242,7 +242,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): u"uploader": u"Philipp Hagemeister", u"uploader_id": u"phihag", u"upload_date": u"20121002", - u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ." + u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .", + u"categories": [u'Science & Technology'], } }, { @@ -1136,18 +1137,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # upload date upload_date = None - mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL) + mobj = re.search(r'(?s)id="eow-date.*?>(.*?)</span>', video_webpage) if mobj is not None: upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) upload_date = unified_strdate(upload_date) - - video_categories = [] - # categories m_cat_container = get_element_by_id("eow-category", video_webpage) if m_cat_container: - video_categories = re.findall(r'<a[^<]+>(.*?)</a>', - m_cat_container, re.DOTALL) + category = self._html_search_regex( + r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'cateory', + default=None) + video_categories = None if category is None else [category] + else: + video_categories = None # description video_description = get_element_by_id("eow-description", video_webpage)