youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 39baacc49f323adc639d502d38a016ebd63acd75
parent 3a1d48d6de0159807ff57b2cec6766cbfd400f00
Author: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Date:   Sat, 21 Sep 2013 12:45:53 +0200

[dailymotion] Add an extractor for users (closes #1476)

Diffstat:
Mtest/test_playlists.py | 16+++++++++++++++-
Myoutube_dl/extractor/__init__.py | 6+++++-
Myoutube_dl/extractor/dailymotion.py | 44+++++++++++++++++++++++++++++++++++---------
3 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/test/test_playlists.py b/test/test_playlists.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# encoding: utf-8 import sys import unittest @@ -8,7 +9,13 @@ import json import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE, UstreamChannelIE, SoundcloudUserIE +from youtube_dl.extractor import ( + DailymotionPlaylistIE, + DailymotionUserIE, + VimeoChannelIE, + UstreamChannelIE, + SoundcloudUserIE, +) from youtube_dl.utils import * from helper import FakeYDL @@ -25,6 +32,13 @@ class TestPlaylists(unittest.TestCase): self.assertIsPlaylist(result) self.assertEqual(result['title'], u'SPORT') self.assertTrue(len(result['entries']) > 20) + def test_dailymotion_user(self): + dl = FakeYDL() + ie = DailymotionUserIE(dl) + result = ie.extract('http://www.dailymotion.com/user/generation-quoi/') + self.assertIsPlaylist(result) + self.assertEqual(result['title'], u'Génération Quoi') + self.assertTrue(len(result['entries']) >= 26) def test_vimeo_channel(self): dl = FakeYDL() diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py @@ -18,7 +18,11 @@ from .comedycentral import ComedyCentralIE from .condenast import CondeNastIE from .criterion import CriterionIE from .cspan import CSpanIE -from .dailymotion import DailymotionIE, DailymotionPlaylistIE +from .dailymotion import ( + DailymotionIE, + DailymotionPlaylistIE, + DailymotionUserIE, +) from .daum import DaumIE from .depositfiles import DepositFilesIE from .dotsub import DotsubIE diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py @@ -114,28 +114,54 @@ class DailymotionIE(SubtitlesInfoExtractor): class DailymotionPlaylistIE(InfoExtractor): + IE_NAME = u'dailymotion:playlist' _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/' _MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>' + _PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s' - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - playlist_id = mobj.group('id') + def _extract_entries(self, id): video_ids = [] - for pagenum in itertools.count(1): - webpage = self._download_webpage('https://www.dailymotion.com/playlist/%s/%s' % (playlist_id, pagenum), - playlist_id, u'Downloading page %s' % pagenum) + webpage = self._download_webpage(self._PAGE_TEMPLATE % (id, pagenum), + id, u'Downloading page %s' % pagenum) playlist_el = get_element_by_attribute(u'class', u'video_list', webpage) video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el)) if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: break - - entries = [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') + return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') for video_id in video_ids] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + playlist_id = mobj.group('id') + webpage = self._download_webpage(url, playlist_id) + return {'_type': 'playlist', 'id': playlist_id, 'title': get_element_by_id(u'playlist_name', webpage), - 'entries': entries, + 'entries': self._extract_entries(playlist_id), } + + +class DailymotionUserIE(DailymotionPlaylistIE): + IE_NAME = u'dailymotion:user' + _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)' + _MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/user/.+?".*?>.*?</a>.*?</div>' + _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + user = mobj.group('user') + webpage = self._download_webpage(url, user) + full_user = self._html_search_regex( + r'<a class="label" href="/%s".*?>(.*?)</' % re.escape(user), + webpage, u'user', flags=re.DOTALL) + + return { + '_type': 'playlist', + 'id': user, + 'title': full_user, + 'entries': self._extract_entries(user), + }