youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 32470bf619d31605dc9c51ad107839a097f829f4
parent 8b61bfd6389b62f054cdf9dcb3436395c82a8e28
Author: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Date:   Sun, 19 Jul 2015 11:24:19 +0200

[sportschau] Improve title extraction

The html '<title>' ends with '- sportschau.de', which shouldn't be part of the title.

Diffstat:
Myoutube_dl/extractor/sportschau.py | 5+++--
1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/sportschau.py b/youtube_dl/extractor/sportschau.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import get_element_by_attribute class SportschauIE(InfoExtractor): @@ -12,7 +13,7 @@ class SportschauIE(InfoExtractor): 'info_dict': { 'id': 'seppeltkokainhatnichtsmitklassischemdopingzutun100', 'ext': 'mp4', - 'title': 'Seppelt: "Kokain hat nichts mit klassischem Doping zu tun" - Tour de France - sportschau.de', + 'title': 'Seppelt: "Kokain hat nichts mit klassischem Doping zu tun"', 'thumbnail': 're:^https?://.*\.jpg$', 'description': 'Der ARD-Doping Experte Hajo Seppelt gibt seine Einschätzung zum ersten Dopingfall der diesjährigen Tour de France um den Italiener Luca Paolini ab.', }, @@ -34,7 +35,7 @@ class SportschauIE(InfoExtractor): m3u8_formats = self._extract_m3u8_formats(m3u8_url, video_id, ext="mp4") webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title') + title = get_element_by_attribute('class', 'headline', webpage) desc = self._html_search_meta('description', webpage) return {