Add various anime sites (Closes #4554) - youtube-dl - Another place where youtube-dl lives on

commit b68ff259170711574a1fd779e86bdfea9aaafcf5
parent 19b05d886ef0bd23f4a4c6ee90f171c48cb55ace
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Sun,  4 Jan 2015 02:05:26 +0100

Add various anime sites (Closes #4554)

Diffstat:
M AUTHORS  | 1 +
M youtube_dl/extractor/__init__.py  | 19 +++++++++++++++++++
A youtube_dl/extractor/gogoanime.py  | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A youtube_dl/extractor/play44.py  | 149 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A youtube_dl/extractor/soulanime.py  | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A youtube_dl/extractor/videofun.py  | 36 ++++++++++++++++++++++++++++++++++++

6 files changed, 355 insertions(+), 0 deletions(-)
diff --git a/AUTHORS b/AUTHORS
@@ -98,3 +98,4 @@ Will Glynn
 Max Reimann
 Cédric Luthi
 Thijs Vermeir
+Joel Leclerc
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
@@ -164,6 +164,10 @@ from .globo import GloboIE
 from .godtube import GodTubeIE
 from .goldenmoustache import GoldenMoustacheIE
 from .golem import GolemIE
+from .gogoanime import (
+    GoGoAnimeIE,
+    GoGoAnimeSearchIE
+)
 from .googleplus import GooglePlusIE
 from .googlesearch import GoogleSearchIE
 from .gorillavid import GorillaVidIE
@@ -313,6 +317,16 @@ from .phoenix import PhoenixIE
 from .photobucket import PhotobucketIE
 from .planetaplay import PlanetaPlayIE
 from .played import PlayedIE
+from .play44 import (
+    Play44IE,
+    ByZooIE,
+    Video44IE,
+    VideoWingIE,
+    PlayPandaIE,
+    VideoZooIE,
+    PlayBBIE,
+    EasyVideoIE
+)
 from .playfm import PlayFMIE
 from .playvid import PlayvidIE
 from .podomatic import PodomaticIE
@@ -373,6 +387,10 @@ from .smotri import (
 from .snotr import SnotrIE
 from .sockshare import SockshareIE
 from .sohu import SohuIE
+from .soulanime import (
+    SoulAnimeWatchingIE,
+    SoulAnimeSeriesIE
+)
 from .soundcloud import (
     SoundcloudIE,
     SoundcloudSetIE,
@@ -467,6 +485,7 @@ from .viddler import ViddlerIE
 from .videobam import VideoBamIE
 from .videodetective import VideoDetectiveIE
 from .videolecturesnet import VideoLecturesNetIE
+from .videofun import VideoFunIE
 from .videofyme import VideofyMeIE
 from .videomega import VideoMegaIE
 from .videopremium import VideoPremiumIE
diff --git a/youtube_dl/extractor/gogoanime.py b/youtube_dl/extractor/gogoanime.py
@@ -0,0 +1,76 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    compat_urllib_parse,
+    get_element_by_attribute,
+    unescapeHTML
+)
+
+
+class GoGoAnimeIE(InfoExtractor):
+    IE_NAME = 'gogoanime'
+    IE_DESC = 'GoGoAnime'
+    _VALID_URL = r'http://www.gogoanime.com/(?P<id>[A-Za-z0-9-]+)'
+
+    _TEST = {
+        'url': 'http://www.gogoanime.com/mahou-shoujo-madoka-magica-movie-1',
+        'info_dict': {
+            'id': 'mahou-shoujo-madoka-magica-movie-1'
+        },
+        'playlist_count': 3
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        page = self._download_webpage(url, video_id)
+
+        if 'Oops! Page Not Found</font>' in page:
+            raise ExtractorError('Video does not exist', expected=True)
+
+        content = get_element_by_attribute("class", "postcontent", page)
+        vids = re.findall(r'<iframe[^>]*?src=[\'"](h[^\'"]+)[\'"]', content)
+        vids = [
+            unescapeHTML(compat_urllib_parse.unquote(x))
+            for x in vids if not re.search(r".*videofun.*", x)]
+
+        if re.search(r'<div class="postcontent">[^<]*<p><iframe src=[\'"][^>]+></iframe><br />', page):
+            return self.playlist_result([self.url_result(vid) for vid in vids], video_id)
+
+        title = self._html_search_regex(
+            r'<div class="postdesc">[^<]*<h1>([^<]+)</h1>', page, 'title')
+
+        return {
+            '_type': 'url',
+            'id': video_id,
+            'url': vids[0],
+            'title': title,
+        }
+
+
+class GoGoAnimeSearchIE(InfoExtractor):
+    IE_NAME = 'gogoanime:search'
+    IE_DESC = 'GoGoAnime Search'
+
+    _VALID_URL = r'http://www\.gogoanime\.com/.*\?s=(?P<id>[^&]*)'
+    _TEST = {
+        'url': 'http://www.gogoanime.com/?s=bokusatsu',
+        'info_dict': {
+            'id': 'bokusatsu'
+        },
+        'playlist_count': 6
+    }
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        webpage = self._download_webpage(url, playlist_id)
+
+        posts = re.findall(
+            r'<div class="postlist">[^<]*<p[^>]*>[^<]*<a href="(?P<url>[^"]+)"',
+            webpage)
+
+        return self.playlist_result(
+            [self.url_result(p) for p in posts], playlist_id)
diff --git a/youtube_dl/extractor/play44.py b/youtube_dl/extractor/play44.py
@@ -0,0 +1,149 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse
+)
+
+
+class Play44IE(InfoExtractor):
+    _VALID_URL = r'http://[w.]*play44\.net/embed\.php[^/]*/(?P<id>.+)'
+
+    _TESTS = [{
+        'url': 'http://play44.net/embed.php?w=600&h=438&vid=M/mahou-shoujo-madoka-magica-07.flv',
+        'md5': 'e37e99d665f503dd2db952f7c4dba9e6',
+        'info_dict': {
+            'id': 'mahou-shoujo-madoka-magica-07',
+            'ext': 'flv',
+            'title': 'mahou-shoujo-madoka-magica-07',
+        }
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        page = self._download_webpage(url, video_id)
+
+        video_url = compat_urllib_parse.unquote(self._html_search_regex(
+            r'_url = "(https?://[^"]+?)";', page, 'url'))
+        title = self._search_regex(r'.*/(?P<title>[^.]*).', video_url, 'title')
+
+        return {
+            'id': title,
+            'url': video_url,
+            'title': title,
+        }
+
+
+class ByZooIE(Play44IE):
+    _VALID_URL = r'http://[w.]*byzoo\.org/embed\.php[^/]*/(?P<id>.+)'
+
+    _TESTS = [{
+        'url': 'http://byzoo.org/embed.php?w=600&h=438&vid=at/nw/mahou_shoujo_madoka_magica_movie_3_-_part1.mp4',
+        'md5': '455c83dabe2cd9fd74a87612b01fe017',
+        'info_dict': {
+            'id': 'mahou_shoujo_madoka_magica_movie_3_-_part1',
+            'ext': 'mp4',
+            'title': 'mahou_shoujo_madoka_magica_movie_3_-_part1',
+        }
+    }]
+
+
+class Video44IE(Play44IE):
+    _VALID_URL = r'http://[w.]*video44\.net/.*file=(?P<id>[^&].).*'
+
+    _TESTS = [{
+        'url': 'http://www.video44.net/gogo/?w=600&h=438&file=chaoshead-12.flv&sv=1',
+        'md5': '43eaec6d0beb10e8d42459b9f108aff3',
+        'info_dict': {
+            'id': 'chaoshead-12',
+            'ext': 'mp4',
+            'title': 'chaoshead-12',
+        }
+    }]
+
+
+class VideoWingIE(Play44IE):
+    _VALID_URL = r'''(?x)
+        http://[w.]*videowing\.[^/]*/
+        (?:
+            .*video=/*
+            |embed/
+        )
+        (?P<id>[^&?.]+)
+    '''
+
+    _TESTS = [{
+        'url': 'http://videowing.me/embed?w=718&h=438&video=ongoing/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
+        'md5': '4ed320e353ed26c742c4f12a9c210b60',
+        'info_dict': {
+            'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
+            'ext': 'mp4',
+            'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
+        }
+    }, {
+        'url': 'http://videowing.me/embed/a8d6a39522df066bd734a69f2334497e?w=600&h=438',
+        'md5': '33fdd71581357018c226f95c5cedcfd7',
+        'info_dict': {
+            'id': 'mahoushoujomadokamagicamovie1part1',
+            'ext': 'flv',
+            'title': 'mahoushoujomadokamagicamovie1part1',
+        }
+    }]
+
+
+class PlayPandaIE(Play44IE):
+    _VALID_URL = r'http://[w.]*playpanda\.[^/]*/.*vid=/*(?P<id>[^&].).*'
+
+    _TESTS = [{
+        'url': 'http://playpanda.net/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
+        'md5': '4ed320e353ed26c742c4f12a9c210b60',
+        'info_dict': {
+            'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
+            'ext': 'mp4',
+            'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
+            'description': 'boku_wa_tomodachi_ga_sukunai_-_05'
+        }
+    }]
+
+
+class VideoZooIE(Play44IE):
+    _VALID_URL = r'http://[w.]*videozoo\.[^/]*/.*vid=/*(?P<id>[^&].).*'
+
+    _TESTS = [{
+        'url': 'http://videozoo.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
+        'md5': '4ed320e353ed26c742c4f12a9c210b60',
+        'info_dict': {
+            'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
+            'ext': 'mp4',
+            'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
+        }
+    }]
+
+
+class PlayBBIE(Play44IE):
+    _VALID_URL = r'http://[w.]*playbb\.[^/]*/.*vid=/*(?P<id>[^&].).*'
+
+    _TESTS = [{
+        'url': 'http://playbb.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
+        'md5': '4ed320e353ed26c742c4f12a9c210b60',
+        'info_dict': {
+            'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
+            'ext': 'mp4',
+            'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
+        }
+    }]
+
+
+class EasyVideoIE(Play44IE):
+    _VALID_URL = r'http://[w.]*easyvideo\.[^/]*/.*file=/*(?P<id>[^&.]+)'
+
+    _TESTS = [{
+        'url': 'http://easyvideo.me/gogo/?w=718&h=438&file=bokuwatomodachigasukunai-04.flv&sv=1',
+        'md5': '26178b57629b7650106d72b191137176',
+        'info_dict': {
+            'id': 'bokuwatomodachigasukunai-04',
+            'ext': 'mp4',
+            'title': 'bokuwatomodachigasukunai-04',
+        },
+        'skip': 'Blocked in Germany',
+    }]
diff --git a/youtube_dl/extractor/soulanime.py b/youtube_dl/extractor/soulanime.py
@@ -0,0 +1,74 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class SoulAnimeWatchingIE(InfoExtractor):
+    IE_NAME = "soulanime:watching"
+    IE_DESC = "SoulAnime video"
+    _TEST = {
+        'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/',
+        'md5': '05fae04abf72298098b528e98abf4298',
+        'info_dict': {
+            'id': 'seirei-tsukai-no-blade-dance-episode-9',
+            'ext': 'mp4',
+            'title': 'seirei-tsukai-no-blade-dance-episode-9',
+            'description': 'seirei-tsukai-no-blade-dance-episode-9'
+        }
+    }
+    _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        domain = mobj.group('domain')
+
+        page = self._download_webpage(url, video_id)
+
+        video_url_encoded = self._html_search_regex(
+            r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url')
+        video_url = "http://www.soul-anime." + domain + video_url_encoded
+
+        vid = self._request_webpage(video_url, video_id)
+        ext = vid.info().gettype().split("/")[1]
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'ext': ext,
+            'title': video_id,
+            'description': video_id
+        }
+
+
+class SoulAnimeSeriesIE(InfoExtractor):
+    IE_NAME = "soulanime:series"
+    IE_DESC = "SoulAnime Series"
+
+    _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)'
+
+    _EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>'
+
+    _TEST = {
+        'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/',
+        'info_dict': {
+            'id': 'black-rock-shooter-tv'
+        },
+        'playlist_count': 8
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        series_id = mobj.group('id')
+        domain = mobj.group('domain')
+
+        pattern = re.compile(self._EPISODE_REGEX)
+
+        page = self._download_webpage(url, series_id, "Downloading series page")
+        mobj = pattern.findall(page)
+
+        entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj]
+
+        return self.playlist_result(entries, series_id)
diff --git a/youtube_dl/extractor/videofun.py b/youtube_dl/extractor/videofun.py
@@ -0,0 +1,36 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse
+)
+
+
+class VideoFunIE(InfoExtractor):
+    _VALID_URL = r'http://[w.]*videofun\.me/embed/(?P<id>[0-9a-f]+)'
+
+    _TEST = {
+        'url': 'http://videofun.me/embed/8267659be070860af600fee7deadbcdb?w=600&h=438',
+        'md5': 'e37e99d665f503dd2db952f7c4dba9e6',
+        'info_dict': {
+            'id': 'Mahou-Shoujo-Madoka-Magica-07',
+            'ext': 'flv',
+            'title': 'Mahou-Shoujo-Madoka-Magica-07',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(
+            url, video_id, 'Downloading video page')
+
+        video_url_encoded = self._html_search_regex(
+            r'url: "(http://gateway\.videofun\.me[^"]+)"', webpage, 'video url')
+        video_url = compat_urllib_parse.unquote(video_url_encoded)
+        title = self._html_search_regex(r'.*/([^.]*)\.', video_url, 'title')
+
+        return {
+            'id': title,
+            'url': video_url,
+            'title': title,
+        }

	youtube-dl Another place where youtube-dl lives on
	git clone git://git.oshgnacknak.de/youtube-dl.git
	Log \| Files \| Refs \| README \| LICENSE

M	AUTHORS	\|	1	+
M	youtube_dl/extractor/__init__.py	\|	19	+++++++++++++++++++
A	youtube_dl/extractor/gogoanime.py	\|	76	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	youtube_dl/extractor/play44.py	\|	149	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	youtube_dl/extractor/soulanime.py	\|	74	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	youtube_dl/extractor/videofun.py	\|	36	++++++++++++++++++++++++++++++++++++