[openload] remove OpenLoad related extractors(closes #11999)(closes #15406)
authorRemita Amine <remitamine@gmail.com>
Tue, 26 Nov 2019 22:57:37 +0000 (23:57 +0100)
committerRemita Amine <remitamine@gmail.com>
Tue, 26 Nov 2019 22:57:37 +0000 (23:57 +0100)
youtube_dl/extractor/extractors.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/openload.py
youtube_dl/extractor/streamango.py [deleted file]

index cf4bb8f209285a934cb2a70d16edfd9ba12734c5..0e349b778d497f9d6090bdd1ff48d6e238429cba 100644 (file)
@@ -796,10 +796,6 @@ from .ooyala import (
     OoyalaIE,
     OoyalaExternalIE,
 )
-from .openload import (
-    OpenloadIE,
-    VerystreamIE,
-)
 from .ora import OraTVIE
 from .orf import (
     ORFTVthekIE,
@@ -1060,7 +1056,6 @@ from .srmediathek import SRMediathekIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .steam import SteamIE
 from .streamable import StreamableIE
-from .streamango import StreamangoIE
 from .streamcloud import StreamcloudIE
 from .streamcz import StreamCZIE
 from .streetvoice import StreetVoiceIE
index 3d919f656fc821daa7b20f399348c0400e617cc2..743ef47dbe2b6a534a65be67dee28e78aa8fe215 100644 (file)
@@ -88,10 +88,6 @@ from .piksel import PikselIE
 from .videa import VideaIE
 from .twentymin import TwentyMinutenIE
 from .ustream import UstreamIE
-from .openload import (
-    OpenloadIE,
-    VerystreamIE,
-)
 from .videopress import VideoPressIE
 from .rutube import RutubeIE
 from .limelight import LimelightBaseIE
@@ -3048,18 +3044,6 @@ class GenericIE(InfoExtractor):
             return self.playlist_from_matches(
                 twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
 
-        # Look for Openload embeds
-        openload_urls = OpenloadIE._extract_urls(webpage)
-        if openload_urls:
-            return self.playlist_from_matches(
-                openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
-
-        # Look for Verystream embeds
-        verystream_urls = VerystreamIE._extract_urls(webpage)
-        if verystream_urls:
-            return self.playlist_from_matches(
-                verystream_urls, video_id, video_title, ie=VerystreamIE.ie_key())
-
         # Look for VideoPress embeds
         videopress_urls = VideoPressIE._extract_urls(webpage)
         if videopress_urls:
index 66e38cdb4b16d7fe31b320ad7d3cb67a70411af7..0c20d0177e0f2abbc5c0503babf51ba66715f374 100644 (file)
@@ -3,21 +3,17 @@ from __future__ import unicode_literals
 
 import json
 import os
-import re
 import subprocess
 import tempfile
 
-from .common import InfoExtractor
 from ..compat import (
     compat_urlparse,
     compat_kwargs,
 )
 from ..utils import (
     check_executable,
-    determine_ext,
     encodeArgument,
     ExtractorError,
-    get_element_by_id,
     get_exe_version,
     is_outdated_version,
     std_headers,
@@ -240,262 +236,3 @@ class PhantomJSwrapper(object):
         self._load_cookies()
 
         return (html, encodeArgument(out))
-
-
-class OpenloadIE(InfoExtractor):
-    _DOMAINS = r'''
-                    (?:
-                        openload\.(?:co|io|link|pw)|
-                        oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|online|monster|press|pw|life|live|space|services|website|vip)|
-                        oladblock\.(?:services|xyz|me)|openloed\.co
-                    )
-                '''
-    _VALID_URL = r'''(?x)
-                    https?://
-                        (?P<host>
-                            (?:www\.)?
-                            %s
-                        )/
-                        (?:f|embed)/
-                        (?P<id>[a-zA-Z0-9-_]+)
-                    ''' % _DOMAINS
-    _EMBED_WORD = 'embed'
-    _STREAM_WORD = 'f'
-    _REDIR_WORD = 'stream'
-    _URL_IDS = ('streamurl', 'streamuri', 'streamurj')
-    _TESTS = [{
-        'url': 'https://openload.co/f/kUEfGclsU9o',
-        'md5': 'bf1c059b004ebc7a256f89408e65c36e',
-        'info_dict': {
-            'id': 'kUEfGclsU9o',
-            'ext': 'mp4',
-            'title': 'skyrim_no-audio_1080.mp4',
-            'thumbnail': r're:^https?://.*\.jpg$',
-        },
-    }, {
-        'url': 'https://openload.co/embed/rjC09fkPLYs',
-        'info_dict': {
-            'id': 'rjC09fkPLYs',
-            'ext': 'mp4',
-            'title': 'movie.mp4',
-            'thumbnail': r're:^https?://.*\.jpg$',
-            'subtitles': {
-                'en': [{
-                    'ext': 'vtt',
-                }],
-            },
-        },
-        'params': {
-            'skip_download': True,  # test subtitles only
-        },
-    }, {
-        'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4',
-        'only_matching': True,
-    }, {
-        'url': 'https://openload.io/f/ZAn6oz-VZGE/',
-        'only_matching': True,
-    }, {
-        'url': 'https://openload.co/f/_-ztPaZtMhM/',
-        'only_matching': True,
-    }, {
-        # unavailable via https://openload.co/f/Sxz5sADo82g/, different layout
-        # for title and ext
-        'url': 'https://openload.co/embed/Sxz5sADo82g/',
-        'only_matching': True,
-    }, {
-        # unavailable via https://openload.co/embed/e-Ixz9ZR5L0/ but available
-        # via https://openload.co/f/e-Ixz9ZR5L0/
-        'url': 'https://openload.co/f/e-Ixz9ZR5L0/',
-        'only_matching': True,
-    }, {
-        'url': 'https://oload.tv/embed/KnG-kKZdcfY/',
-        'only_matching': True,
-    }, {
-        'url': 'http://www.openload.link/f/KnG-kKZdcfY',
-        'only_matching': True,
-    }, {
-        'url': 'https://oload.stream/f/KnG-kKZdcfY',
-        'only_matching': True,
-    }, {
-        'url': 'https://oload.xyz/f/WwRBpzW8Wtk',
-        'only_matching': True,
-    }, {
-        'url': 'https://oload.win/f/kUEfGclsU9o',
-        'only_matching': True,
-    }, {
-        'url': 'https://oload.download/f/kUEfGclsU9o',
-        'only_matching': True,
-    }, {
-        'url': 'https://oload.cloud/f/4ZDnBXRWiB8',
-        'only_matching': True,
-    }, {
-        # Its title has not got its extension but url has it
-        'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4',
-        'only_matching': True,
-    }, {
-        'url': 'https://oload.cc/embed/5NEAbI2BDSk',
-        'only_matching': True,
-    }, {
-        'url': 'https://oload.icu/f/-_i4y_F_Hs8',
-        'only_matching': True,
-    }, {
-        'url': 'https://oload.fun/f/gb6G1H4sHXY',
-        'only_matching': True,
-    }, {
-        'url': 'https://oload.club/f/Nr1L-aZ2dbQ',
-        'only_matching': True,
-    }, {
-        'url': 'https://oload.info/f/5NEAbI2BDSk',
-        'only_matching': True,
-    }, {
-        'url': 'https://openload.pw/f/WyKgK8s94N0',
-        'only_matching': True,
-    }, {
-        'url': 'https://oload.pw/f/WyKgK8s94N0',
-        'only_matching': True,
-    }, {
-        'url': 'https://oload.live/f/-Z58UZ-GR4M',
-        'only_matching': True,
-    }, {
-        'url': 'https://oload.space/f/IY4eZSst3u8/',
-        'only_matching': True,
-    }, {
-        'url': 'https://oload.services/embed/bs1NWj1dCag/',
-        'only_matching': True,
-    }, {
-        'url': 'https://oload.online/f/W8o2UfN1vNY/',
-        'only_matching': True,
-    }, {
-        'url': 'https://oload.monster/f/W8o2UfN1vNY/',
-        'only_matching': True,
-    }, {
-        'url': 'https://oload.press/embed/drTBl1aOTvk/',
-        'only_matching': True,
-    }, {
-        'url': 'https://oload.website/embed/drTBl1aOTvk/',
-        'only_matching': True,
-    }, {
-        'url': 'https://oload.life/embed/oOzZjNPw9Dc/',
-        'only_matching': True,
-    }, {
-        'url': 'https://oload.biz/f/bEk3Gp8ARr4/',
-        'only_matching': True,
-    }, {
-        'url': 'https://oload.best/embed/kkz9JgVZeWc/',
-        'only_matching': True,
-    }, {
-        'url': 'https://oladblock.services/f/b8NWEgkqNLI/',
-        'only_matching': True,
-    }, {
-        'url': 'https://oladblock.xyz/f/b8NWEgkqNLI/',
-        'only_matching': True,
-    }, {
-        'url': 'https://oladblock.me/f/b8NWEgkqNLI/',
-        'only_matching': True,
-    }, {
-        'url': 'https://openloed.co/f/b8NWEgkqNLI/',
-        'only_matching': True,
-    }, {
-        'url': 'https://oload.vip/f/kUEfGclsU9o',
-        'only_matching': True,
-    }]
-
-    @classmethod
-    def _extract_urls(cls, webpage):
-        return re.findall(
-            r'(?x)<iframe[^>]+src=["\']((?:https?://)?%s/%s/[a-zA-Z0-9-_]+)'
-            % (cls._DOMAINS, cls._EMBED_WORD), webpage)
-
-    def _extract_decrypted_page(self, page_url, webpage, video_id):
-        phantom = PhantomJSwrapper(self, required_version='2.0')
-        webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id)
-        return webpage
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        host = mobj.group('host')
-        video_id = mobj.group('id')
-
-        url_pattern = 'https://%s/%%s/%s/' % (host, video_id)
-
-        for path in (self._EMBED_WORD, self._STREAM_WORD):
-            page_url = url_pattern % path
-            last = path == self._STREAM_WORD
-            webpage = self._download_webpage(
-                page_url, video_id, 'Downloading %s webpage' % path,
-                fatal=last)
-            if not webpage:
-                continue
-            if 'File not found' in webpage or 'deleted by the owner' in webpage:
-                if not last:
-                    continue
-                raise ExtractorError('File not found', expected=True, video_id=video_id)
-            break
-
-        webpage = self._extract_decrypted_page(page_url, webpage, video_id)
-        for element_id in self._URL_IDS:
-            decoded_id = get_element_by_id(element_id, webpage)
-            if decoded_id:
-                break
-        if not decoded_id:
-            decoded_id = self._search_regex(
-                (r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<',
-                 r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)',
-                 r'>\s*([\w-]+~\d{10,}~(?:[a-f\d]+:){2}:~[\w-]+)\s*<',
-                 r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)\s*<',
-                 r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage,
-                'stream URL')
-        video_url = 'https://%s/%s/%s?mime=true' % (host, self._REDIR_WORD, decoded_id)
-
-        title = self._og_search_title(webpage, default=None) or self._search_regex(
-            r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
-            'title', default=None) or self._html_search_meta(
-            'description', webpage, 'title', fatal=True)
-
-        entries = self._parse_html5_media_entries(page_url, webpage, video_id)
-        entry = entries[0] if entries else {}
-        subtitles = entry.get('subtitles')
-
-        return {
-            'id': video_id,
-            'title': title,
-            'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
-            'url': video_url,
-            'ext': determine_ext(title, None) or determine_ext(url, 'mp4'),
-            'subtitles': subtitles,
-        }
-
-
-class VerystreamIE(OpenloadIE):
-    IE_NAME = 'verystream'
-
-    _DOMAINS = r'(?:verystream\.com|woof\.tube)'
-    _VALID_URL = r'''(?x)
-                    https?://
-                        (?P<host>
-                            (?:www\.)?
-                            %s
-                        )/
-                        (?:stream|e)/
-                        (?P<id>[a-zA-Z0-9-_]+)
-                    ''' % _DOMAINS
-    _EMBED_WORD = 'e'
-    _STREAM_WORD = 'stream'
-    _REDIR_WORD = 'gettoken'
-    _URL_IDS = ('videolink', )
-    _TESTS = [{
-        'url': 'https://verystream.com/stream/c1GWQ9ngBBx/',
-        'md5': 'd3e8c5628ccb9970b65fd65269886795',
-        'info_dict': {
-            'id': 'c1GWQ9ngBBx',
-            'ext': 'mp4',
-            'title': 'Big Buck Bunny.mp4',
-            'thumbnail': r're:^https?://.*\.jpg$',
-        },
-    }, {
-        'url': 'https://verystream.com/e/c1GWQ9ngBBx/',
-        'only_matching': True,
-    }]
-
-    def _extract_decrypted_page(self, page_url, webpage, video_id):
-        return webpage  # for Verystream, the webpage is already decrypted
diff --git a/youtube_dl/extractor/streamango.py b/youtube_dl/extractor/streamango.py
deleted file mode 100644 (file)
index f1e17dd..0000000
+++ /dev/null
@@ -1,128 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_chr
-from ..utils import (
-    determine_ext,
-    ExtractorError,
-    int_or_none,
-    js_to_json,
-)
-
-
-class StreamangoIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?:streamango\.com|fruithosts\.net|streamcherry\.com)/(?:f|embed)/(?P<id>[^/?#&]+)'
-    _TESTS = [{
-        'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4',
-        'md5': 'e992787515a182f55e38fc97588d802a',
-        'info_dict': {
-            'id': 'clapasobsptpkdfe',
-            'ext': 'mp4',
-            'title': '20170315_150006.mp4',
-        }
-    }, {
-        # no og:title
-        'url': 'https://streamango.com/embed/foqebrpftarclpob/asdf_asd_2_mp4',
-        'info_dict': {
-            'id': 'foqebrpftarclpob',
-            'ext': 'mp4',
-            'title': 'foqebrpftarclpob',
-        },
-        'params': {
-            'skip_download': True,
-        },
-        'skip': 'gone',
-    }, {
-        'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
-        'only_matching': True,
-    }, {
-        'url': 'https://fruithosts.net/f/mreodparcdcmspsm/w1f1_r4lph_2018_brrs_720p_latino_mp4',
-        'only_matching': True,
-    }, {
-        'url': 'https://streamcherry.com/f/clapasobsptpkdfe/',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        def decrypt_src(encoded, val):
-            ALPHABET = '=/+9876543210zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA'
-            encoded = re.sub(r'[^A-Za-z0-9+/=]', '', encoded)
-            decoded = ''
-            sm = [None] * 4
-            i = 0
-            str_len = len(encoded)
-            while i < str_len:
-                for j in range(4):
-                    sm[j % 4] = ALPHABET.index(encoded[i])
-                    i += 1
-                char_code = ((sm[0] << 0x2) | (sm[1] >> 0x4)) ^ val
-                decoded += compat_chr(char_code)
-                if sm[2] != 0x40:
-                    char_code = ((sm[1] & 0xf) << 0x4) | (sm[2] >> 0x2)
-                    decoded += compat_chr(char_code)
-                if sm[3] != 0x40:
-                    char_code = ((sm[2] & 0x3) << 0x6) | sm[3]
-                    decoded += compat_chr(char_code)
-            return decoded
-
-        video_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, video_id)
-
-        title = self._og_search_title(webpage, default=video_id)
-
-        formats = []
-        for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage):
-            mobj = re.search(r'(src\s*:\s*[^(]+\(([^)]*)\)[\s,]*)', format_)
-            if mobj is None:
-                continue
-
-            format_ = format_.replace(mobj.group(0), '')
-
-            video = self._parse_json(
-                format_, video_id, transform_source=js_to_json,
-                fatal=False) or {}
-
-            mobj = re.search(
-                r'([\'"])(?P<src>(?:(?!\1).)+)\1\s*,\s*(?P<val>\d+)',
-                mobj.group(1))
-            if mobj is None:
-                continue
-
-            src = decrypt_src(mobj.group('src'), int_or_none(mobj.group('val')))
-            if not src:
-                continue
-
-            ext = determine_ext(src, default_ext=None)
-            if video.get('type') == 'application/dash+xml' or ext == 'mpd':
-                formats.extend(self._extract_mpd_formats(
-                    src, video_id, mpd_id='dash', fatal=False))
-            else:
-                formats.append({
-                    'url': src,
-                    'ext': ext or 'mp4',
-                    'width': int_or_none(video.get('width')),
-                    'height': int_or_none(video.get('height')),
-                    'tbr': int_or_none(video.get('bitrate')),
-                })
-
-        if not formats:
-            error = self._search_regex(
-                r'<p[^>]+\bclass=["\']lead[^>]+>(.+?)</p>', webpage,
-                'error', default=None)
-            if not error and '>Sorry' in webpage:
-                error = 'Video %s is not available' % video_id
-            if error:
-                raise ExtractorError(error, expected=True)
-
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'url': url,
-            'title': title,
-            'formats': formats,
-        }