youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

utils.py (170851B)


      1 #!/usr/bin/env python
      2 # coding: utf-8
      3 
      4 from __future__ import unicode_literals
      5 
      6 import base64
      7 import binascii
      8 import calendar
      9 import codecs
     10 import collections
     11 import contextlib
     12 import ctypes
     13 import datetime
     14 import email.utils
     15 import email.header
     16 import errno
     17 import functools
     18 import gzip
     19 import io
     20 import itertools
     21 import json
     22 import locale
     23 import math
     24 import operator
     25 import os
     26 import platform
     27 import random
     28 import re
     29 import socket
     30 import ssl
     31 import subprocess
     32 import sys
     33 import tempfile
     34 import time
     35 import traceback
     36 import xml.etree.ElementTree
     37 import zlib
     38 
     39 from .compat import (
     40     compat_HTMLParseError,
     41     compat_HTMLParser,
     42     compat_HTTPError,
     43     compat_basestring,
     44     compat_chr,
     45     compat_cookiejar,
     46     compat_ctypes_WINFUNCTYPE,
     47     compat_etree_fromstring,
     48     compat_expanduser,
     49     compat_html_entities,
     50     compat_html_entities_html5,
     51     compat_http_client,
     52     compat_integer_types,
     53     compat_kwargs,
     54     compat_os_name,
     55     compat_parse_qs,
     56     compat_shlex_quote,
     57     compat_str,
     58     compat_struct_pack,
     59     compat_struct_unpack,
     60     compat_urllib_error,
     61     compat_urllib_parse,
     62     compat_urllib_parse_urlencode,
     63     compat_urllib_parse_urlparse,
     64     compat_urllib_parse_unquote_plus,
     65     compat_urllib_request,
     66     compat_urlparse,
     67     compat_xpath,
     68 )
     69 
     70 from .socks import (
     71     ProxyType,
     72     sockssocket,
     73 )
     74 
     75 
     76 def register_socks_protocols():
     77     # "Register" SOCKS protocols
     78     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
     79     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
     80     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
     81         if scheme not in compat_urlparse.uses_netloc:
     82             compat_urlparse.uses_netloc.append(scheme)
     83 
     84 
     85 # This is not clearly defined otherwise
     86 compiled_regex_type = type(re.compile(''))
     87 
     88 
     89 def random_user_agent():
     90     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
     91     _CHROME_VERSIONS = (
     92         '74.0.3729.129',
     93         '76.0.3780.3',
     94         '76.0.3780.2',
     95         '74.0.3729.128',
     96         '76.0.3780.1',
     97         '76.0.3780.0',
     98         '75.0.3770.15',
     99         '74.0.3729.127',
    100         '74.0.3729.126',
    101         '76.0.3779.1',
    102         '76.0.3779.0',
    103         '75.0.3770.14',
    104         '74.0.3729.125',
    105         '76.0.3778.1',
    106         '76.0.3778.0',
    107         '75.0.3770.13',
    108         '74.0.3729.124',
    109         '74.0.3729.123',
    110         '73.0.3683.121',
    111         '76.0.3777.1',
    112         '76.0.3777.0',
    113         '75.0.3770.12',
    114         '74.0.3729.122',
    115         '76.0.3776.4',
    116         '75.0.3770.11',
    117         '74.0.3729.121',
    118         '76.0.3776.3',
    119         '76.0.3776.2',
    120         '73.0.3683.120',
    121         '74.0.3729.120',
    122         '74.0.3729.119',
    123         '74.0.3729.118',
    124         '76.0.3776.1',
    125         '76.0.3776.0',
    126         '76.0.3775.5',
    127         '75.0.3770.10',
    128         '74.0.3729.117',
    129         '76.0.3775.4',
    130         '76.0.3775.3',
    131         '74.0.3729.116',
    132         '75.0.3770.9',
    133         '76.0.3775.2',
    134         '76.0.3775.1',
    135         '76.0.3775.0',
    136         '75.0.3770.8',
    137         '74.0.3729.115',
    138         '74.0.3729.114',
    139         '76.0.3774.1',
    140         '76.0.3774.0',
    141         '75.0.3770.7',
    142         '74.0.3729.113',
    143         '74.0.3729.112',
    144         '74.0.3729.111',
    145         '76.0.3773.1',
    146         '76.0.3773.0',
    147         '75.0.3770.6',
    148         '74.0.3729.110',
    149         '74.0.3729.109',
    150         '76.0.3772.1',
    151         '76.0.3772.0',
    152         '75.0.3770.5',
    153         '74.0.3729.108',
    154         '74.0.3729.107',
    155         '76.0.3771.1',
    156         '76.0.3771.0',
    157         '75.0.3770.4',
    158         '74.0.3729.106',
    159         '74.0.3729.105',
    160         '75.0.3770.3',
    161         '74.0.3729.104',
    162         '74.0.3729.103',
    163         '74.0.3729.102',
    164         '75.0.3770.2',
    165         '74.0.3729.101',
    166         '75.0.3770.1',
    167         '75.0.3770.0',
    168         '74.0.3729.100',
    169         '75.0.3769.5',
    170         '75.0.3769.4',
    171         '74.0.3729.99',
    172         '75.0.3769.3',
    173         '75.0.3769.2',
    174         '75.0.3768.6',
    175         '74.0.3729.98',
    176         '75.0.3769.1',
    177         '75.0.3769.0',
    178         '74.0.3729.97',
    179         '73.0.3683.119',
    180         '73.0.3683.118',
    181         '74.0.3729.96',
    182         '75.0.3768.5',
    183         '75.0.3768.4',
    184         '75.0.3768.3',
    185         '75.0.3768.2',
    186         '74.0.3729.95',
    187         '74.0.3729.94',
    188         '75.0.3768.1',
    189         '75.0.3768.0',
    190         '74.0.3729.93',
    191         '74.0.3729.92',
    192         '73.0.3683.117',
    193         '74.0.3729.91',
    194         '75.0.3766.3',
    195         '74.0.3729.90',
    196         '75.0.3767.2',
    197         '75.0.3767.1',
    198         '75.0.3767.0',
    199         '74.0.3729.89',
    200         '73.0.3683.116',
    201         '75.0.3766.2',
    202         '74.0.3729.88',
    203         '75.0.3766.1',
    204         '75.0.3766.0',
    205         '74.0.3729.87',
    206         '73.0.3683.115',
    207         '74.0.3729.86',
    208         '75.0.3765.1',
    209         '75.0.3765.0',
    210         '74.0.3729.85',
    211         '73.0.3683.114',
    212         '74.0.3729.84',
    213         '75.0.3764.1',
    214         '75.0.3764.0',
    215         '74.0.3729.83',
    216         '73.0.3683.113',
    217         '75.0.3763.2',
    218         '75.0.3761.4',
    219         '74.0.3729.82',
    220         '75.0.3763.1',
    221         '75.0.3763.0',
    222         '74.0.3729.81',
    223         '73.0.3683.112',
    224         '75.0.3762.1',
    225         '75.0.3762.0',
    226         '74.0.3729.80',
    227         '75.0.3761.3',
    228         '74.0.3729.79',
    229         '73.0.3683.111',
    230         '75.0.3761.2',
    231         '74.0.3729.78',
    232         '74.0.3729.77',
    233         '75.0.3761.1',
    234         '75.0.3761.0',
    235         '73.0.3683.110',
    236         '74.0.3729.76',
    237         '74.0.3729.75',
    238         '75.0.3760.0',
    239         '74.0.3729.74',
    240         '75.0.3759.8',
    241         '75.0.3759.7',
    242         '75.0.3759.6',
    243         '74.0.3729.73',
    244         '75.0.3759.5',
    245         '74.0.3729.72',
    246         '73.0.3683.109',
    247         '75.0.3759.4',
    248         '75.0.3759.3',
    249         '74.0.3729.71',
    250         '75.0.3759.2',
    251         '74.0.3729.70',
    252         '73.0.3683.108',
    253         '74.0.3729.69',
    254         '75.0.3759.1',
    255         '75.0.3759.0',
    256         '74.0.3729.68',
    257         '73.0.3683.107',
    258         '74.0.3729.67',
    259         '75.0.3758.1',
    260         '75.0.3758.0',
    261         '74.0.3729.66',
    262         '73.0.3683.106',
    263         '74.0.3729.65',
    264         '75.0.3757.1',
    265         '75.0.3757.0',
    266         '74.0.3729.64',
    267         '73.0.3683.105',
    268         '74.0.3729.63',
    269         '75.0.3756.1',
    270         '75.0.3756.0',
    271         '74.0.3729.62',
    272         '73.0.3683.104',
    273         '75.0.3755.3',
    274         '75.0.3755.2',
    275         '73.0.3683.103',
    276         '75.0.3755.1',
    277         '75.0.3755.0',
    278         '74.0.3729.61',
    279         '73.0.3683.102',
    280         '74.0.3729.60',
    281         '75.0.3754.2',
    282         '74.0.3729.59',
    283         '75.0.3753.4',
    284         '74.0.3729.58',
    285         '75.0.3754.1',
    286         '75.0.3754.0',
    287         '74.0.3729.57',
    288         '73.0.3683.101',
    289         '75.0.3753.3',
    290         '75.0.3752.2',
    291         '75.0.3753.2',
    292         '74.0.3729.56',
    293         '75.0.3753.1',
    294         '75.0.3753.0',
    295         '74.0.3729.55',
    296         '73.0.3683.100',
    297         '74.0.3729.54',
    298         '75.0.3752.1',
    299         '75.0.3752.0',
    300         '74.0.3729.53',
    301         '73.0.3683.99',
    302         '74.0.3729.52',
    303         '75.0.3751.1',
    304         '75.0.3751.0',
    305         '74.0.3729.51',
    306         '73.0.3683.98',
    307         '74.0.3729.50',
    308         '75.0.3750.0',
    309         '74.0.3729.49',
    310         '74.0.3729.48',
    311         '74.0.3729.47',
    312         '75.0.3749.3',
    313         '74.0.3729.46',
    314         '73.0.3683.97',
    315         '75.0.3749.2',
    316         '74.0.3729.45',
    317         '75.0.3749.1',
    318         '75.0.3749.0',
    319         '74.0.3729.44',
    320         '73.0.3683.96',
    321         '74.0.3729.43',
    322         '74.0.3729.42',
    323         '75.0.3748.1',
    324         '75.0.3748.0',
    325         '74.0.3729.41',
    326         '75.0.3747.1',
    327         '73.0.3683.95',
    328         '75.0.3746.4',
    329         '74.0.3729.40',
    330         '74.0.3729.39',
    331         '75.0.3747.0',
    332         '75.0.3746.3',
    333         '75.0.3746.2',
    334         '74.0.3729.38',
    335         '75.0.3746.1',
    336         '75.0.3746.0',
    337         '74.0.3729.37',
    338         '73.0.3683.94',
    339         '75.0.3745.5',
    340         '75.0.3745.4',
    341         '75.0.3745.3',
    342         '75.0.3745.2',
    343         '74.0.3729.36',
    344         '75.0.3745.1',
    345         '75.0.3745.0',
    346         '75.0.3744.2',
    347         '74.0.3729.35',
    348         '73.0.3683.93',
    349         '74.0.3729.34',
    350         '75.0.3744.1',
    351         '75.0.3744.0',
    352         '74.0.3729.33',
    353         '73.0.3683.92',
    354         '74.0.3729.32',
    355         '74.0.3729.31',
    356         '73.0.3683.91',
    357         '75.0.3741.2',
    358         '75.0.3740.5',
    359         '74.0.3729.30',
    360         '75.0.3741.1',
    361         '75.0.3741.0',
    362         '74.0.3729.29',
    363         '75.0.3740.4',
    364         '73.0.3683.90',
    365         '74.0.3729.28',
    366         '75.0.3740.3',
    367         '73.0.3683.89',
    368         '75.0.3740.2',
    369         '74.0.3729.27',
    370         '75.0.3740.1',
    371         '75.0.3740.0',
    372         '74.0.3729.26',
    373         '73.0.3683.88',
    374         '73.0.3683.87',
    375         '74.0.3729.25',
    376         '75.0.3739.1',
    377         '75.0.3739.0',
    378         '73.0.3683.86',
    379         '74.0.3729.24',
    380         '73.0.3683.85',
    381         '75.0.3738.4',
    382         '75.0.3738.3',
    383         '75.0.3738.2',
    384         '75.0.3738.1',
    385         '75.0.3738.0',
    386         '74.0.3729.23',
    387         '73.0.3683.84',
    388         '74.0.3729.22',
    389         '74.0.3729.21',
    390         '75.0.3737.1',
    391         '75.0.3737.0',
    392         '74.0.3729.20',
    393         '73.0.3683.83',
    394         '74.0.3729.19',
    395         '75.0.3736.1',
    396         '75.0.3736.0',
    397         '74.0.3729.18',
    398         '73.0.3683.82',
    399         '74.0.3729.17',
    400         '75.0.3735.1',
    401         '75.0.3735.0',
    402         '74.0.3729.16',
    403         '73.0.3683.81',
    404         '75.0.3734.1',
    405         '75.0.3734.0',
    406         '74.0.3729.15',
    407         '73.0.3683.80',
    408         '74.0.3729.14',
    409         '75.0.3733.1',
    410         '75.0.3733.0',
    411         '75.0.3732.1',
    412         '74.0.3729.13',
    413         '74.0.3729.12',
    414         '73.0.3683.79',
    415         '74.0.3729.11',
    416         '75.0.3732.0',
    417         '74.0.3729.10',
    418         '73.0.3683.78',
    419         '74.0.3729.9',
    420         '74.0.3729.8',
    421         '74.0.3729.7',
    422         '75.0.3731.3',
    423         '75.0.3731.2',
    424         '75.0.3731.0',
    425         '74.0.3729.6',
    426         '73.0.3683.77',
    427         '73.0.3683.76',
    428         '75.0.3730.5',
    429         '75.0.3730.4',
    430         '73.0.3683.75',
    431         '74.0.3729.5',
    432         '73.0.3683.74',
    433         '75.0.3730.3',
    434         '75.0.3730.2',
    435         '74.0.3729.4',
    436         '73.0.3683.73',
    437         '73.0.3683.72',
    438         '75.0.3730.1',
    439         '75.0.3730.0',
    440         '74.0.3729.3',
    441         '73.0.3683.71',
    442         '74.0.3729.2',
    443         '73.0.3683.70',
    444         '74.0.3729.1',
    445         '74.0.3729.0',
    446         '74.0.3726.4',
    447         '73.0.3683.69',
    448         '74.0.3726.3',
    449         '74.0.3728.0',
    450         '74.0.3726.2',
    451         '73.0.3683.68',
    452         '74.0.3726.1',
    453         '74.0.3726.0',
    454         '74.0.3725.4',
    455         '73.0.3683.67',
    456         '73.0.3683.66',
    457         '74.0.3725.3',
    458         '74.0.3725.2',
    459         '74.0.3725.1',
    460         '74.0.3724.8',
    461         '74.0.3725.0',
    462         '73.0.3683.65',
    463         '74.0.3724.7',
    464         '74.0.3724.6',
    465         '74.0.3724.5',
    466         '74.0.3724.4',
    467         '74.0.3724.3',
    468         '74.0.3724.2',
    469         '74.0.3724.1',
    470         '74.0.3724.0',
    471         '73.0.3683.64',
    472         '74.0.3723.1',
    473         '74.0.3723.0',
    474         '73.0.3683.63',
    475         '74.0.3722.1',
    476         '74.0.3722.0',
    477         '73.0.3683.62',
    478         '74.0.3718.9',
    479         '74.0.3702.3',
    480         '74.0.3721.3',
    481         '74.0.3721.2',
    482         '74.0.3721.1',
    483         '74.0.3721.0',
    484         '74.0.3720.6',
    485         '73.0.3683.61',
    486         '72.0.3626.122',
    487         '73.0.3683.60',
    488         '74.0.3720.5',
    489         '72.0.3626.121',
    490         '74.0.3718.8',
    491         '74.0.3720.4',
    492         '74.0.3720.3',
    493         '74.0.3718.7',
    494         '74.0.3720.2',
    495         '74.0.3720.1',
    496         '74.0.3720.0',
    497         '74.0.3718.6',
    498         '74.0.3719.5',
    499         '73.0.3683.59',
    500         '74.0.3718.5',
    501         '74.0.3718.4',
    502         '74.0.3719.4',
    503         '74.0.3719.3',
    504         '74.0.3719.2',
    505         '74.0.3719.1',
    506         '73.0.3683.58',
    507         '74.0.3719.0',
    508         '73.0.3683.57',
    509         '73.0.3683.56',
    510         '74.0.3718.3',
    511         '73.0.3683.55',
    512         '74.0.3718.2',
    513         '74.0.3718.1',
    514         '74.0.3718.0',
    515         '73.0.3683.54',
    516         '74.0.3717.2',
    517         '73.0.3683.53',
    518         '74.0.3717.1',
    519         '74.0.3717.0',
    520         '73.0.3683.52',
    521         '74.0.3716.1',
    522         '74.0.3716.0',
    523         '73.0.3683.51',
    524         '74.0.3715.1',
    525         '74.0.3715.0',
    526         '73.0.3683.50',
    527         '74.0.3711.2',
    528         '74.0.3714.2',
    529         '74.0.3713.3',
    530         '74.0.3714.1',
    531         '74.0.3714.0',
    532         '73.0.3683.49',
    533         '74.0.3713.1',
    534         '74.0.3713.0',
    535         '72.0.3626.120',
    536         '73.0.3683.48',
    537         '74.0.3712.2',
    538         '74.0.3712.1',
    539         '74.0.3712.0',
    540         '73.0.3683.47',
    541         '72.0.3626.119',
    542         '73.0.3683.46',
    543         '74.0.3710.2',
    544         '72.0.3626.118',
    545         '74.0.3711.1',
    546         '74.0.3711.0',
    547         '73.0.3683.45',
    548         '72.0.3626.117',
    549         '74.0.3710.1',
    550         '74.0.3710.0',
    551         '73.0.3683.44',
    552         '72.0.3626.116',
    553         '74.0.3709.1',
    554         '74.0.3709.0',
    555         '74.0.3704.9',
    556         '73.0.3683.43',
    557         '72.0.3626.115',
    558         '74.0.3704.8',
    559         '74.0.3704.7',
    560         '74.0.3708.0',
    561         '74.0.3706.7',
    562         '74.0.3704.6',
    563         '73.0.3683.42',
    564         '72.0.3626.114',
    565         '74.0.3706.6',
    566         '72.0.3626.113',
    567         '74.0.3704.5',
    568         '74.0.3706.5',
    569         '74.0.3706.4',
    570         '74.0.3706.3',
    571         '74.0.3706.2',
    572         '74.0.3706.1',
    573         '74.0.3706.0',
    574         '73.0.3683.41',
    575         '72.0.3626.112',
    576         '74.0.3705.1',
    577         '74.0.3705.0',
    578         '73.0.3683.40',
    579         '72.0.3626.111',
    580         '73.0.3683.39',
    581         '74.0.3704.4',
    582         '73.0.3683.38',
    583         '74.0.3704.3',
    584         '74.0.3704.2',
    585         '74.0.3704.1',
    586         '74.0.3704.0',
    587         '73.0.3683.37',
    588         '72.0.3626.110',
    589         '72.0.3626.109',
    590         '74.0.3703.3',
    591         '74.0.3703.2',
    592         '73.0.3683.36',
    593         '74.0.3703.1',
    594         '74.0.3703.0',
    595         '73.0.3683.35',
    596         '72.0.3626.108',
    597         '74.0.3702.2',
    598         '74.0.3699.3',
    599         '74.0.3702.1',
    600         '74.0.3702.0',
    601         '73.0.3683.34',
    602         '72.0.3626.107',
    603         '73.0.3683.33',
    604         '74.0.3701.1',
    605         '74.0.3701.0',
    606         '73.0.3683.32',
    607         '73.0.3683.31',
    608         '72.0.3626.105',
    609         '74.0.3700.1',
    610         '74.0.3700.0',
    611         '73.0.3683.29',
    612         '72.0.3626.103',
    613         '74.0.3699.2',
    614         '74.0.3699.1',
    615         '74.0.3699.0',
    616         '73.0.3683.28',
    617         '72.0.3626.102',
    618         '73.0.3683.27',
    619         '73.0.3683.26',
    620         '74.0.3698.0',
    621         '74.0.3696.2',
    622         '72.0.3626.101',
    623         '73.0.3683.25',
    624         '74.0.3696.1',
    625         '74.0.3696.0',
    626         '74.0.3694.8',
    627         '72.0.3626.100',
    628         '74.0.3694.7',
    629         '74.0.3694.6',
    630         '74.0.3694.5',
    631         '74.0.3694.4',
    632         '72.0.3626.99',
    633         '72.0.3626.98',
    634         '74.0.3694.3',
    635         '73.0.3683.24',
    636         '72.0.3626.97',
    637         '72.0.3626.96',
    638         '72.0.3626.95',
    639         '73.0.3683.23',
    640         '72.0.3626.94',
    641         '73.0.3683.22',
    642         '73.0.3683.21',
    643         '72.0.3626.93',
    644         '74.0.3694.2',
    645         '72.0.3626.92',
    646         '74.0.3694.1',
    647         '74.0.3694.0',
    648         '74.0.3693.6',
    649         '73.0.3683.20',
    650         '72.0.3626.91',
    651         '74.0.3693.5',
    652         '74.0.3693.4',
    653         '74.0.3693.3',
    654         '74.0.3693.2',
    655         '73.0.3683.19',
    656         '74.0.3693.1',
    657         '74.0.3693.0',
    658         '73.0.3683.18',
    659         '72.0.3626.90',
    660         '74.0.3692.1',
    661         '74.0.3692.0',
    662         '73.0.3683.17',
    663         '72.0.3626.89',
    664         '74.0.3687.3',
    665         '74.0.3691.1',
    666         '74.0.3691.0',
    667         '73.0.3683.16',
    668         '72.0.3626.88',
    669         '72.0.3626.87',
    670         '73.0.3683.15',
    671         '74.0.3690.1',
    672         '74.0.3690.0',
    673         '73.0.3683.14',
    674         '72.0.3626.86',
    675         '73.0.3683.13',
    676         '73.0.3683.12',
    677         '74.0.3689.1',
    678         '74.0.3689.0',
    679         '73.0.3683.11',
    680         '72.0.3626.85',
    681         '73.0.3683.10',
    682         '72.0.3626.84',
    683         '73.0.3683.9',
    684         '74.0.3688.1',
    685         '74.0.3688.0',
    686         '73.0.3683.8',
    687         '72.0.3626.83',
    688         '74.0.3687.2',
    689         '74.0.3687.1',
    690         '74.0.3687.0',
    691         '73.0.3683.7',
    692         '72.0.3626.82',
    693         '74.0.3686.4',
    694         '72.0.3626.81',
    695         '74.0.3686.3',
    696         '74.0.3686.2',
    697         '74.0.3686.1',
    698         '74.0.3686.0',
    699         '73.0.3683.6',
    700         '72.0.3626.80',
    701         '74.0.3685.1',
    702         '74.0.3685.0',
    703         '73.0.3683.5',
    704         '72.0.3626.79',
    705         '74.0.3684.1',
    706         '74.0.3684.0',
    707         '73.0.3683.4',
    708         '72.0.3626.78',
    709         '72.0.3626.77',
    710         '73.0.3683.3',
    711         '73.0.3683.2',
    712         '72.0.3626.76',
    713         '73.0.3683.1',
    714         '73.0.3683.0',
    715         '72.0.3626.75',
    716         '71.0.3578.141',
    717         '73.0.3682.1',
    718         '73.0.3682.0',
    719         '72.0.3626.74',
    720         '71.0.3578.140',
    721         '73.0.3681.4',
    722         '73.0.3681.3',
    723         '73.0.3681.2',
    724         '73.0.3681.1',
    725         '73.0.3681.0',
    726         '72.0.3626.73',
    727         '71.0.3578.139',
    728         '72.0.3626.72',
    729         '72.0.3626.71',
    730         '73.0.3680.1',
    731         '73.0.3680.0',
    732         '72.0.3626.70',
    733         '71.0.3578.138',
    734         '73.0.3678.2',
    735         '73.0.3679.1',
    736         '73.0.3679.0',
    737         '72.0.3626.69',
    738         '71.0.3578.137',
    739         '73.0.3678.1',
    740         '73.0.3678.0',
    741         '71.0.3578.136',
    742         '73.0.3677.1',
    743         '73.0.3677.0',
    744         '72.0.3626.68',
    745         '72.0.3626.67',
    746         '71.0.3578.135',
    747         '73.0.3676.1',
    748         '73.0.3676.0',
    749         '73.0.3674.2',
    750         '72.0.3626.66',
    751         '71.0.3578.134',
    752         '73.0.3674.1',
    753         '73.0.3674.0',
    754         '72.0.3626.65',
    755         '71.0.3578.133',
    756         '73.0.3673.2',
    757         '73.0.3673.1',
    758         '73.0.3673.0',
    759         '72.0.3626.64',
    760         '71.0.3578.132',
    761         '72.0.3626.63',
    762         '72.0.3626.62',
    763         '72.0.3626.61',
    764         '72.0.3626.60',
    765         '73.0.3672.1',
    766         '73.0.3672.0',
    767         '72.0.3626.59',
    768         '71.0.3578.131',
    769         '73.0.3671.3',
    770         '73.0.3671.2',
    771         '73.0.3671.1',
    772         '73.0.3671.0',
    773         '72.0.3626.58',
    774         '71.0.3578.130',
    775         '73.0.3670.1',
    776         '73.0.3670.0',
    777         '72.0.3626.57',
    778         '71.0.3578.129',
    779         '73.0.3669.1',
    780         '73.0.3669.0',
    781         '72.0.3626.56',
    782         '71.0.3578.128',
    783         '73.0.3668.2',
    784         '73.0.3668.1',
    785         '73.0.3668.0',
    786         '72.0.3626.55',
    787         '71.0.3578.127',
    788         '73.0.3667.2',
    789         '73.0.3667.1',
    790         '73.0.3667.0',
    791         '72.0.3626.54',
    792         '71.0.3578.126',
    793         '73.0.3666.1',
    794         '73.0.3666.0',
    795         '72.0.3626.53',
    796         '71.0.3578.125',
    797         '73.0.3665.4',
    798         '73.0.3665.3',
    799         '72.0.3626.52',
    800         '73.0.3665.2',
    801         '73.0.3664.4',
    802         '73.0.3665.1',
    803         '73.0.3665.0',
    804         '72.0.3626.51',
    805         '71.0.3578.124',
    806         '72.0.3626.50',
    807         '73.0.3664.3',
    808         '73.0.3664.2',
    809         '73.0.3664.1',
    810         '73.0.3664.0',
    811         '73.0.3663.2',
    812         '72.0.3626.49',
    813         '71.0.3578.123',
    814         '73.0.3663.1',
    815         '73.0.3663.0',
    816         '72.0.3626.48',
    817         '71.0.3578.122',
    818         '73.0.3662.1',
    819         '73.0.3662.0',
    820         '72.0.3626.47',
    821         '71.0.3578.121',
    822         '73.0.3661.1',
    823         '72.0.3626.46',
    824         '73.0.3661.0',
    825         '72.0.3626.45',
    826         '71.0.3578.120',
    827         '73.0.3660.2',
    828         '73.0.3660.1',
    829         '73.0.3660.0',
    830         '72.0.3626.44',
    831         '71.0.3578.119',
    832         '73.0.3659.1',
    833         '73.0.3659.0',
    834         '72.0.3626.43',
    835         '71.0.3578.118',
    836         '73.0.3658.1',
    837         '73.0.3658.0',
    838         '72.0.3626.42',
    839         '71.0.3578.117',
    840         '73.0.3657.1',
    841         '73.0.3657.0',
    842         '72.0.3626.41',
    843         '71.0.3578.116',
    844         '73.0.3656.1',
    845         '73.0.3656.0',
    846         '72.0.3626.40',
    847         '71.0.3578.115',
    848         '73.0.3655.1',
    849         '73.0.3655.0',
    850         '72.0.3626.39',
    851         '71.0.3578.114',
    852         '73.0.3654.1',
    853         '73.0.3654.0',
    854         '72.0.3626.38',
    855         '71.0.3578.113',
    856         '73.0.3653.1',
    857         '73.0.3653.0',
    858         '72.0.3626.37',
    859         '71.0.3578.112',
    860         '73.0.3652.1',
    861         '73.0.3652.0',
    862         '72.0.3626.36',
    863         '71.0.3578.111',
    864         '73.0.3651.1',
    865         '73.0.3651.0',
    866         '72.0.3626.35',
    867         '71.0.3578.110',
    868         '73.0.3650.1',
    869         '73.0.3650.0',
    870         '72.0.3626.34',
    871         '71.0.3578.109',
    872         '73.0.3649.1',
    873         '73.0.3649.0',
    874         '72.0.3626.33',
    875         '71.0.3578.108',
    876         '73.0.3648.2',
    877         '73.0.3648.1',
    878         '73.0.3648.0',
    879         '72.0.3626.32',
    880         '71.0.3578.107',
    881         '73.0.3647.2',
    882         '73.0.3647.1',
    883         '73.0.3647.0',
    884         '72.0.3626.31',
    885         '71.0.3578.106',
    886         '73.0.3635.3',
    887         '73.0.3646.2',
    888         '73.0.3646.1',
    889         '73.0.3646.0',
    890         '72.0.3626.30',
    891         '71.0.3578.105',
    892         '72.0.3626.29',
    893         '73.0.3645.2',
    894         '73.0.3645.1',
    895         '73.0.3645.0',
    896         '72.0.3626.28',
    897         '71.0.3578.104',
    898         '72.0.3626.27',
    899         '72.0.3626.26',
    900         '72.0.3626.25',
    901         '72.0.3626.24',
    902         '73.0.3644.0',
    903         '73.0.3643.2',
    904         '72.0.3626.23',
    905         '71.0.3578.103',
    906         '73.0.3643.1',
    907         '73.0.3643.0',
    908         '72.0.3626.22',
    909         '71.0.3578.102',
    910         '73.0.3642.1',
    911         '73.0.3642.0',
    912         '72.0.3626.21',
    913         '71.0.3578.101',
    914         '73.0.3641.1',
    915         '73.0.3641.0',
    916         '72.0.3626.20',
    917         '71.0.3578.100',
    918         '72.0.3626.19',
    919         '73.0.3640.1',
    920         '73.0.3640.0',
    921         '72.0.3626.18',
    922         '73.0.3639.1',
    923         '71.0.3578.99',
    924         '73.0.3639.0',
    925         '72.0.3626.17',
    926         '73.0.3638.2',
    927         '72.0.3626.16',
    928         '73.0.3638.1',
    929         '73.0.3638.0',
    930         '72.0.3626.15',
    931         '71.0.3578.98',
    932         '73.0.3635.2',
    933         '71.0.3578.97',
    934         '73.0.3637.1',
    935         '73.0.3637.0',
    936         '72.0.3626.14',
    937         '71.0.3578.96',
    938         '71.0.3578.95',
    939         '72.0.3626.13',
    940         '71.0.3578.94',
    941         '73.0.3636.2',
    942         '71.0.3578.93',
    943         '73.0.3636.1',
    944         '73.0.3636.0',
    945         '72.0.3626.12',
    946         '71.0.3578.92',
    947         '73.0.3635.1',
    948         '73.0.3635.0',
    949         '72.0.3626.11',
    950         '71.0.3578.91',
    951         '73.0.3634.2',
    952         '73.0.3634.1',
    953         '73.0.3634.0',
    954         '72.0.3626.10',
    955         '71.0.3578.90',
    956         '71.0.3578.89',
    957         '73.0.3633.2',
    958         '73.0.3633.1',
    959         '73.0.3633.0',
    960         '72.0.3610.4',
    961         '72.0.3626.9',
    962         '71.0.3578.88',
    963         '73.0.3632.5',
    964         '73.0.3632.4',
    965         '73.0.3632.3',
    966         '73.0.3632.2',
    967         '73.0.3632.1',
    968         '73.0.3632.0',
    969         '72.0.3626.8',
    970         '71.0.3578.87',
    971         '73.0.3631.2',
    972         '73.0.3631.1',
    973         '73.0.3631.0',
    974         '72.0.3626.7',
    975         '71.0.3578.86',
    976         '72.0.3626.6',
    977         '73.0.3630.1',
    978         '73.0.3630.0',
    979         '72.0.3626.5',
    980         '71.0.3578.85',
    981         '72.0.3626.4',
    982         '73.0.3628.3',
    983         '73.0.3628.2',
    984         '73.0.3629.1',
    985         '73.0.3629.0',
    986         '72.0.3626.3',
    987         '71.0.3578.84',
    988         '73.0.3628.1',
    989         '73.0.3628.0',
    990         '71.0.3578.83',
    991         '73.0.3627.1',
    992         '73.0.3627.0',
    993         '72.0.3626.2',
    994         '71.0.3578.82',
    995         '71.0.3578.81',
    996         '71.0.3578.80',
    997         '72.0.3626.1',
    998         '72.0.3626.0',
    999         '71.0.3578.79',
   1000         '70.0.3538.124',
   1001         '71.0.3578.78',
   1002         '72.0.3623.4',
   1003         '72.0.3625.2',
   1004         '72.0.3625.1',
   1005         '72.0.3625.0',
   1006         '71.0.3578.77',
   1007         '70.0.3538.123',
   1008         '72.0.3624.4',
   1009         '72.0.3624.3',
   1010         '72.0.3624.2',
   1011         '71.0.3578.76',
   1012         '72.0.3624.1',
   1013         '72.0.3624.0',
   1014         '72.0.3623.3',
   1015         '71.0.3578.75',
   1016         '70.0.3538.122',
   1017         '71.0.3578.74',
   1018         '72.0.3623.2',
   1019         '72.0.3610.3',
   1020         '72.0.3623.1',
   1021         '72.0.3623.0',
   1022         '72.0.3622.3',
   1023         '72.0.3622.2',
   1024         '71.0.3578.73',
   1025         '70.0.3538.121',
   1026         '72.0.3622.1',
   1027         '72.0.3622.0',
   1028         '71.0.3578.72',
   1029         '70.0.3538.120',
   1030         '72.0.3621.1',
   1031         '72.0.3621.0',
   1032         '71.0.3578.71',
   1033         '70.0.3538.119',
   1034         '72.0.3620.1',
   1035         '72.0.3620.0',
   1036         '71.0.3578.70',
   1037         '70.0.3538.118',
   1038         '71.0.3578.69',
   1039         '72.0.3619.1',
   1040         '72.0.3619.0',
   1041         '71.0.3578.68',
   1042         '70.0.3538.117',
   1043         '71.0.3578.67',
   1044         '72.0.3618.1',
   1045         '72.0.3618.0',
   1046         '71.0.3578.66',
   1047         '70.0.3538.116',
   1048         '72.0.3617.1',
   1049         '72.0.3617.0',
   1050         '71.0.3578.65',
   1051         '70.0.3538.115',
   1052         '72.0.3602.3',
   1053         '71.0.3578.64',
   1054         '72.0.3616.1',
   1055         '72.0.3616.0',
   1056         '71.0.3578.63',
   1057         '70.0.3538.114',
   1058         '71.0.3578.62',
   1059         '72.0.3615.1',
   1060         '72.0.3615.0',
   1061         '71.0.3578.61',
   1062         '70.0.3538.113',
   1063         '72.0.3614.1',
   1064         '72.0.3614.0',
   1065         '71.0.3578.60',
   1066         '70.0.3538.112',
   1067         '72.0.3613.1',
   1068         '72.0.3613.0',
   1069         '71.0.3578.59',
   1070         '70.0.3538.111',
   1071         '72.0.3612.2',
   1072         '72.0.3612.1',
   1073         '72.0.3612.0',
   1074         '70.0.3538.110',
   1075         '71.0.3578.58',
   1076         '70.0.3538.109',
   1077         '72.0.3611.2',
   1078         '72.0.3611.1',
   1079         '72.0.3611.0',
   1080         '71.0.3578.57',
   1081         '70.0.3538.108',
   1082         '72.0.3610.2',
   1083         '71.0.3578.56',
   1084         '71.0.3578.55',
   1085         '72.0.3610.1',
   1086         '72.0.3610.0',
   1087         '71.0.3578.54',
   1088         '70.0.3538.107',
   1089         '71.0.3578.53',
   1090         '72.0.3609.3',
   1091         '71.0.3578.52',
   1092         '72.0.3609.2',
   1093         '71.0.3578.51',
   1094         '72.0.3608.5',
   1095         '72.0.3609.1',
   1096         '72.0.3609.0',
   1097         '71.0.3578.50',
   1098         '70.0.3538.106',
   1099         '72.0.3608.4',
   1100         '72.0.3608.3',
   1101         '72.0.3608.2',
   1102         '71.0.3578.49',
   1103         '72.0.3608.1',
   1104         '72.0.3608.0',
   1105         '70.0.3538.105',
   1106         '71.0.3578.48',
   1107         '72.0.3607.1',
   1108         '72.0.3607.0',
   1109         '71.0.3578.47',
   1110         '70.0.3538.104',
   1111         '72.0.3606.2',
   1112         '72.0.3606.1',
   1113         '72.0.3606.0',
   1114         '71.0.3578.46',
   1115         '70.0.3538.103',
   1116         '70.0.3538.102',
   1117         '72.0.3605.3',
   1118         '72.0.3605.2',
   1119         '72.0.3605.1',
   1120         '72.0.3605.0',
   1121         '71.0.3578.45',
   1122         '70.0.3538.101',
   1123         '71.0.3578.44',
   1124         '71.0.3578.43',
   1125         '70.0.3538.100',
   1126         '70.0.3538.99',
   1127         '71.0.3578.42',
   1128         '72.0.3604.1',
   1129         '72.0.3604.0',
   1130         '71.0.3578.41',
   1131         '70.0.3538.98',
   1132         '71.0.3578.40',
   1133         '72.0.3603.2',
   1134         '72.0.3603.1',
   1135         '72.0.3603.0',
   1136         '71.0.3578.39',
   1137         '70.0.3538.97',
   1138         '72.0.3602.2',
   1139         '71.0.3578.38',
   1140         '71.0.3578.37',
   1141         '72.0.3602.1',
   1142         '72.0.3602.0',
   1143         '71.0.3578.36',
   1144         '70.0.3538.96',
   1145         '72.0.3601.1',
   1146         '72.0.3601.0',
   1147         '71.0.3578.35',
   1148         '70.0.3538.95',
   1149         '72.0.3600.1',
   1150         '72.0.3600.0',
   1151         '71.0.3578.34',
   1152         '70.0.3538.94',
   1153         '72.0.3599.3',
   1154         '72.0.3599.2',
   1155         '72.0.3599.1',
   1156         '72.0.3599.0',
   1157         '71.0.3578.33',
   1158         '70.0.3538.93',
   1159         '72.0.3598.1',
   1160         '72.0.3598.0',
   1161         '71.0.3578.32',
   1162         '70.0.3538.87',
   1163         '72.0.3597.1',
   1164         '72.0.3597.0',
   1165         '72.0.3596.2',
   1166         '71.0.3578.31',
   1167         '70.0.3538.86',
   1168         '71.0.3578.30',
   1169         '71.0.3578.29',
   1170         '72.0.3596.1',
   1171         '72.0.3596.0',
   1172         '71.0.3578.28',
   1173         '70.0.3538.85',
   1174         '72.0.3595.2',
   1175         '72.0.3591.3',
   1176         '72.0.3595.1',
   1177         '72.0.3595.0',
   1178         '71.0.3578.27',
   1179         '70.0.3538.84',
   1180         '72.0.3594.1',
   1181         '72.0.3594.0',
   1182         '71.0.3578.26',
   1183         '70.0.3538.83',
   1184         '72.0.3593.2',
   1185         '72.0.3593.1',
   1186         '72.0.3593.0',
   1187         '71.0.3578.25',
   1188         '70.0.3538.82',
   1189         '72.0.3589.3',
   1190         '72.0.3592.2',
   1191         '72.0.3592.1',
   1192         '72.0.3592.0',
   1193         '71.0.3578.24',
   1194         '72.0.3589.2',
   1195         '70.0.3538.81',
   1196         '70.0.3538.80',
   1197         '72.0.3591.2',
   1198         '72.0.3591.1',
   1199         '72.0.3591.0',
   1200         '71.0.3578.23',
   1201         '70.0.3538.79',
   1202         '71.0.3578.22',
   1203         '72.0.3590.1',
   1204         '72.0.3590.0',
   1205         '71.0.3578.21',
   1206         '70.0.3538.78',
   1207         '70.0.3538.77',
   1208         '72.0.3589.1',
   1209         '72.0.3589.0',
   1210         '71.0.3578.20',
   1211         '70.0.3538.76',
   1212         '71.0.3578.19',
   1213         '70.0.3538.75',
   1214         '72.0.3588.1',
   1215         '72.0.3588.0',
   1216         '71.0.3578.18',
   1217         '70.0.3538.74',
   1218         '72.0.3586.2',
   1219         '72.0.3587.0',
   1220         '71.0.3578.17',
   1221         '70.0.3538.73',
   1222         '72.0.3586.1',
   1223         '72.0.3586.0',
   1224         '71.0.3578.16',
   1225         '70.0.3538.72',
   1226         '72.0.3585.1',
   1227         '72.0.3585.0',
   1228         '71.0.3578.15',
   1229         '70.0.3538.71',
   1230         '71.0.3578.14',
   1231         '72.0.3584.1',
   1232         '72.0.3584.0',
   1233         '71.0.3578.13',
   1234         '70.0.3538.70',
   1235         '72.0.3583.2',
   1236         '71.0.3578.12',
   1237         '72.0.3583.1',
   1238         '72.0.3583.0',
   1239         '71.0.3578.11',
   1240         '70.0.3538.69',
   1241         '71.0.3578.10',
   1242         '72.0.3582.0',
   1243         '72.0.3581.4',
   1244         '71.0.3578.9',
   1245         '70.0.3538.67',
   1246         '72.0.3581.3',
   1247         '72.0.3581.2',
   1248         '72.0.3581.1',
   1249         '72.0.3581.0',
   1250         '71.0.3578.8',
   1251         '70.0.3538.66',
   1252         '72.0.3580.1',
   1253         '72.0.3580.0',
   1254         '71.0.3578.7',
   1255         '70.0.3538.65',
   1256         '71.0.3578.6',
   1257         '72.0.3579.1',
   1258         '72.0.3579.0',
   1259         '71.0.3578.5',
   1260         '70.0.3538.64',
   1261         '71.0.3578.4',
   1262         '71.0.3578.3',
   1263         '71.0.3578.2',
   1264         '71.0.3578.1',
   1265         '71.0.3578.0',
   1266         '70.0.3538.63',
   1267         '69.0.3497.128',
   1268         '70.0.3538.62',
   1269         '70.0.3538.61',
   1270         '70.0.3538.60',
   1271         '70.0.3538.59',
   1272         '71.0.3577.1',
   1273         '71.0.3577.0',
   1274         '70.0.3538.58',
   1275         '69.0.3497.127',
   1276         '71.0.3576.2',
   1277         '71.0.3576.1',
   1278         '71.0.3576.0',
   1279         '70.0.3538.57',
   1280         '70.0.3538.56',
   1281         '71.0.3575.2',
   1282         '70.0.3538.55',
   1283         '69.0.3497.126',
   1284         '70.0.3538.54',
   1285         '71.0.3575.1',
   1286         '71.0.3575.0',
   1287         '71.0.3574.1',
   1288         '71.0.3574.0',
   1289         '70.0.3538.53',
   1290         '69.0.3497.125',
   1291         '70.0.3538.52',
   1292         '71.0.3573.1',
   1293         '71.0.3573.0',
   1294         '70.0.3538.51',
   1295         '69.0.3497.124',
   1296         '71.0.3572.1',
   1297         '71.0.3572.0',
   1298         '70.0.3538.50',
   1299         '69.0.3497.123',
   1300         '71.0.3571.2',
   1301         '70.0.3538.49',
   1302         '69.0.3497.122',
   1303         '71.0.3571.1',
   1304         '71.0.3571.0',
   1305         '70.0.3538.48',
   1306         '69.0.3497.121',
   1307         '71.0.3570.1',
   1308         '71.0.3570.0',
   1309         '70.0.3538.47',
   1310         '69.0.3497.120',
   1311         '71.0.3568.2',
   1312         '71.0.3569.1',
   1313         '71.0.3569.0',
   1314         '70.0.3538.46',
   1315         '69.0.3497.119',
   1316         '70.0.3538.45',
   1317         '71.0.3568.1',
   1318         '71.0.3568.0',
   1319         '70.0.3538.44',
   1320         '69.0.3497.118',
   1321         '70.0.3538.43',
   1322         '70.0.3538.42',
   1323         '71.0.3567.1',
   1324         '71.0.3567.0',
   1325         '70.0.3538.41',
   1326         '69.0.3497.117',
   1327         '71.0.3566.1',
   1328         '71.0.3566.0',
   1329         '70.0.3538.40',
   1330         '69.0.3497.116',
   1331         '71.0.3565.1',
   1332         '71.0.3565.0',
   1333         '70.0.3538.39',
   1334         '69.0.3497.115',
   1335         '71.0.3564.1',
   1336         '71.0.3564.0',
   1337         '70.0.3538.38',
   1338         '69.0.3497.114',
   1339         '71.0.3563.0',
   1340         '71.0.3562.2',
   1341         '70.0.3538.37',
   1342         '69.0.3497.113',
   1343         '70.0.3538.36',
   1344         '70.0.3538.35',
   1345         '71.0.3562.1',
   1346         '71.0.3562.0',
   1347         '70.0.3538.34',
   1348         '69.0.3497.112',
   1349         '70.0.3538.33',
   1350         '71.0.3561.1',
   1351         '71.0.3561.0',
   1352         '70.0.3538.32',
   1353         '69.0.3497.111',
   1354         '71.0.3559.6',
   1355         '71.0.3560.1',
   1356         '71.0.3560.0',
   1357         '71.0.3559.5',
   1358         '71.0.3559.4',
   1359         '70.0.3538.31',
   1360         '69.0.3497.110',
   1361         '71.0.3559.3',
   1362         '70.0.3538.30',
   1363         '69.0.3497.109',
   1364         '71.0.3559.2',
   1365         '71.0.3559.1',
   1366         '71.0.3559.0',
   1367         '70.0.3538.29',
   1368         '69.0.3497.108',
   1369         '71.0.3558.2',
   1370         '71.0.3558.1',
   1371         '71.0.3558.0',
   1372         '70.0.3538.28',
   1373         '69.0.3497.107',
   1374         '71.0.3557.2',
   1375         '71.0.3557.1',
   1376         '71.0.3557.0',
   1377         '70.0.3538.27',
   1378         '69.0.3497.106',
   1379         '71.0.3554.4',
   1380         '70.0.3538.26',
   1381         '71.0.3556.1',
   1382         '71.0.3556.0',
   1383         '70.0.3538.25',
   1384         '71.0.3554.3',
   1385         '69.0.3497.105',
   1386         '71.0.3554.2',
   1387         '70.0.3538.24',
   1388         '69.0.3497.104',
   1389         '71.0.3555.2',
   1390         '70.0.3538.23',
   1391         '71.0.3555.1',
   1392         '71.0.3555.0',
   1393         '70.0.3538.22',
   1394         '69.0.3497.103',
   1395         '71.0.3554.1',
   1396         '71.0.3554.0',
   1397         '70.0.3538.21',
   1398         '69.0.3497.102',
   1399         '71.0.3553.3',
   1400         '70.0.3538.20',
   1401         '69.0.3497.101',
   1402         '71.0.3553.2',
   1403         '69.0.3497.100',
   1404         '71.0.3553.1',
   1405         '71.0.3553.0',
   1406         '70.0.3538.19',
   1407         '69.0.3497.99',
   1408         '69.0.3497.98',
   1409         '69.0.3497.97',
   1410         '71.0.3552.6',
   1411         '71.0.3552.5',
   1412         '71.0.3552.4',
   1413         '71.0.3552.3',
   1414         '71.0.3552.2',
   1415         '71.0.3552.1',
   1416         '71.0.3552.0',
   1417         '70.0.3538.18',
   1418         '69.0.3497.96',
   1419         '71.0.3551.3',
   1420         '71.0.3551.2',
   1421         '71.0.3551.1',
   1422         '71.0.3551.0',
   1423         '70.0.3538.17',
   1424         '69.0.3497.95',
   1425         '71.0.3550.3',
   1426         '71.0.3550.2',
   1427         '71.0.3550.1',
   1428         '71.0.3550.0',
   1429         '70.0.3538.16',
   1430         '69.0.3497.94',
   1431         '71.0.3549.1',
   1432         '71.0.3549.0',
   1433         '70.0.3538.15',
   1434         '69.0.3497.93',
   1435         '69.0.3497.92',
   1436         '71.0.3548.1',
   1437         '71.0.3548.0',
   1438         '70.0.3538.14',
   1439         '69.0.3497.91',
   1440         '71.0.3547.1',
   1441         '71.0.3547.0',
   1442         '70.0.3538.13',
   1443         '69.0.3497.90',
   1444         '71.0.3546.2',
   1445         '69.0.3497.89',
   1446         '71.0.3546.1',
   1447         '71.0.3546.0',
   1448         '70.0.3538.12',
   1449         '69.0.3497.88',
   1450         '71.0.3545.4',
   1451         '71.0.3545.3',
   1452         '71.0.3545.2',
   1453         '71.0.3545.1',
   1454         '71.0.3545.0',
   1455         '70.0.3538.11',
   1456         '69.0.3497.87',
   1457         '71.0.3544.5',
   1458         '71.0.3544.4',
   1459         '71.0.3544.3',
   1460         '71.0.3544.2',
   1461         '71.0.3544.1',
   1462         '71.0.3544.0',
   1463         '69.0.3497.86',
   1464         '70.0.3538.10',
   1465         '69.0.3497.85',
   1466         '70.0.3538.9',
   1467         '69.0.3497.84',
   1468         '71.0.3543.4',
   1469         '70.0.3538.8',
   1470         '71.0.3543.3',
   1471         '71.0.3543.2',
   1472         '71.0.3543.1',
   1473         '71.0.3543.0',
   1474         '70.0.3538.7',
   1475         '69.0.3497.83',
   1476         '71.0.3542.2',
   1477         '71.0.3542.1',
   1478         '71.0.3542.0',
   1479         '70.0.3538.6',
   1480         '69.0.3497.82',
   1481         '69.0.3497.81',
   1482         '71.0.3541.1',
   1483         '71.0.3541.0',
   1484         '70.0.3538.5',
   1485         '69.0.3497.80',
   1486         '71.0.3540.1',
   1487         '71.0.3540.0',
   1488         '70.0.3538.4',
   1489         '69.0.3497.79',
   1490         '70.0.3538.3',
   1491         '71.0.3539.1',
   1492         '71.0.3539.0',
   1493         '69.0.3497.78',
   1494         '68.0.3440.134',
   1495         '69.0.3497.77',
   1496         '70.0.3538.2',
   1497         '70.0.3538.1',
   1498         '70.0.3538.0',
   1499         '69.0.3497.76',
   1500         '68.0.3440.133',
   1501         '69.0.3497.75',
   1502         '70.0.3537.2',
   1503         '70.0.3537.1',
   1504         '70.0.3537.0',
   1505         '69.0.3497.74',
   1506         '68.0.3440.132',
   1507         '70.0.3536.0',
   1508         '70.0.3535.5',
   1509         '70.0.3535.4',
   1510         '70.0.3535.3',
   1511         '69.0.3497.73',
   1512         '68.0.3440.131',
   1513         '70.0.3532.8',
   1514         '70.0.3532.7',
   1515         '69.0.3497.72',
   1516         '69.0.3497.71',
   1517         '70.0.3535.2',
   1518         '70.0.3535.1',
   1519         '70.0.3535.0',
   1520         '69.0.3497.70',
   1521         '68.0.3440.130',
   1522         '69.0.3497.69',
   1523         '68.0.3440.129',
   1524         '70.0.3534.4',
   1525         '70.0.3534.3',
   1526         '70.0.3534.2',
   1527         '70.0.3534.1',
   1528         '70.0.3534.0',
   1529         '69.0.3497.68',
   1530         '68.0.3440.128',
   1531         '70.0.3533.2',
   1532         '70.0.3533.1',
   1533         '70.0.3533.0',
   1534         '69.0.3497.67',
   1535         '68.0.3440.127',
   1536         '70.0.3532.6',
   1537         '70.0.3532.5',
   1538         '70.0.3532.4',
   1539         '69.0.3497.66',
   1540         '68.0.3440.126',
   1541         '70.0.3532.3',
   1542         '70.0.3532.2',
   1543         '70.0.3532.1',
   1544         '69.0.3497.60',
   1545         '69.0.3497.65',
   1546         '69.0.3497.64',
   1547         '70.0.3532.0',
   1548         '70.0.3531.0',
   1549         '70.0.3530.4',
   1550         '70.0.3530.3',
   1551         '70.0.3530.2',
   1552         '69.0.3497.58',
   1553         '68.0.3440.125',
   1554         '69.0.3497.57',
   1555         '69.0.3497.56',
   1556         '69.0.3497.55',
   1557         '69.0.3497.54',
   1558         '70.0.3530.1',
   1559         '70.0.3530.0',
   1560         '69.0.3497.53',
   1561         '68.0.3440.124',
   1562         '69.0.3497.52',
   1563         '70.0.3529.3',
   1564         '70.0.3529.2',
   1565         '70.0.3529.1',
   1566         '70.0.3529.0',
   1567         '69.0.3497.51',
   1568         '70.0.3528.4',
   1569         '68.0.3440.123',
   1570         '70.0.3528.3',
   1571         '70.0.3528.2',
   1572         '70.0.3528.1',
   1573         '70.0.3528.0',
   1574         '69.0.3497.50',
   1575         '68.0.3440.122',
   1576         '70.0.3527.1',
   1577         '70.0.3527.0',
   1578         '69.0.3497.49',
   1579         '68.0.3440.121',
   1580         '70.0.3526.1',
   1581         '70.0.3526.0',
   1582         '68.0.3440.120',
   1583         '69.0.3497.48',
   1584         '69.0.3497.47',
   1585         '68.0.3440.119',
   1586         '68.0.3440.118',
   1587         '70.0.3525.5',
   1588         '70.0.3525.4',
   1589         '70.0.3525.3',
   1590         '68.0.3440.117',
   1591         '69.0.3497.46',
   1592         '70.0.3525.2',
   1593         '70.0.3525.1',
   1594         '70.0.3525.0',
   1595         '69.0.3497.45',
   1596         '68.0.3440.116',
   1597         '70.0.3524.4',
   1598         '70.0.3524.3',
   1599         '69.0.3497.44',
   1600         '70.0.3524.2',
   1601         '70.0.3524.1',
   1602         '70.0.3524.0',
   1603         '70.0.3523.2',
   1604         '69.0.3497.43',
   1605         '68.0.3440.115',
   1606         '70.0.3505.9',
   1607         '69.0.3497.42',
   1608         '70.0.3505.8',
   1609         '70.0.3523.1',
   1610         '70.0.3523.0',
   1611         '69.0.3497.41',
   1612         '68.0.3440.114',
   1613         '70.0.3505.7',
   1614         '69.0.3497.40',
   1615         '70.0.3522.1',
   1616         '70.0.3522.0',
   1617         '70.0.3521.2',
   1618         '69.0.3497.39',
   1619         '68.0.3440.113',
   1620         '70.0.3505.6',
   1621         '70.0.3521.1',
   1622         '70.0.3521.0',
   1623         '69.0.3497.38',
   1624         '68.0.3440.112',
   1625         '70.0.3520.1',
   1626         '70.0.3520.0',
   1627         '69.0.3497.37',
   1628         '68.0.3440.111',
   1629         '70.0.3519.3',
   1630         '70.0.3519.2',
   1631         '70.0.3519.1',
   1632         '70.0.3519.0',
   1633         '69.0.3497.36',
   1634         '68.0.3440.110',
   1635         '70.0.3518.1',
   1636         '70.0.3518.0',
   1637         '69.0.3497.35',
   1638         '69.0.3497.34',
   1639         '68.0.3440.109',
   1640         '70.0.3517.1',
   1641         '70.0.3517.0',
   1642         '69.0.3497.33',
   1643         '68.0.3440.108',
   1644         '69.0.3497.32',
   1645         '70.0.3516.3',
   1646         '70.0.3516.2',
   1647         '70.0.3516.1',
   1648         '70.0.3516.0',
   1649         '69.0.3497.31',
   1650         '68.0.3440.107',
   1651         '70.0.3515.4',
   1652         '68.0.3440.106',
   1653         '70.0.3515.3',
   1654         '70.0.3515.2',
   1655         '70.0.3515.1',
   1656         '70.0.3515.0',
   1657         '69.0.3497.30',
   1658         '68.0.3440.105',
   1659         '68.0.3440.104',
   1660         '70.0.3514.2',
   1661         '70.0.3514.1',
   1662         '70.0.3514.0',
   1663         '69.0.3497.29',
   1664         '68.0.3440.103',
   1665         '70.0.3513.1',
   1666         '70.0.3513.0',
   1667         '69.0.3497.28',
   1668     )
   1669     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
   1670 
   1671 
   1672 std_headers = {
   1673     'User-Agent': random_user_agent(),
   1674     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
   1675     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
   1676     'Accept-Encoding': 'gzip, deflate',
   1677     'Accept-Language': 'en-us,en;q=0.5',
   1678 }
   1679 
   1680 
   1681 USER_AGENTS = {
   1682     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
   1683 }
   1684 
   1685 
   1686 NO_DEFAULT = object()
   1687 
   1688 ENGLISH_MONTH_NAMES = [
   1689     'January', 'February', 'March', 'April', 'May', 'June',
   1690     'July', 'August', 'September', 'October', 'November', 'December']
   1691 
   1692 MONTH_NAMES = {
   1693     'en': ENGLISH_MONTH_NAMES,
   1694     'fr': [
   1695         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
   1696         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
   1697 }
   1698 
   1699 KNOWN_EXTENSIONS = (
   1700     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
   1701     'flv', 'f4v', 'f4a', 'f4b',
   1702     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
   1703     'mkv', 'mka', 'mk3d',
   1704     'avi', 'divx',
   1705     'mov',
   1706     'asf', 'wmv', 'wma',
   1707     '3gp', '3g2',
   1708     'mp3',
   1709     'flac',
   1710     'ape',
   1711     'wav',
   1712     'f4f', 'f4m', 'm3u8', 'smil')
   1713 
   1714 # needed for sanitizing filenames in restricted mode
   1715 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
   1716                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
   1717                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
   1718 
   1719 DATE_FORMATS = (
   1720     '%d %B %Y',
   1721     '%d %b %Y',
   1722     '%B %d %Y',
   1723     '%B %dst %Y',
   1724     '%B %dnd %Y',
   1725     '%B %drd %Y',
   1726     '%B %dth %Y',
   1727     '%b %d %Y',
   1728     '%b %dst %Y',
   1729     '%b %dnd %Y',
   1730     '%b %drd %Y',
   1731     '%b %dth %Y',
   1732     '%b %dst %Y %I:%M',
   1733     '%b %dnd %Y %I:%M',
   1734     '%b %drd %Y %I:%M',
   1735     '%b %dth %Y %I:%M',
   1736     '%Y %m %d',
   1737     '%Y-%m-%d',
   1738     '%Y/%m/%d',
   1739     '%Y/%m/%d %H:%M',
   1740     '%Y/%m/%d %H:%M:%S',
   1741     '%Y-%m-%d %H:%M',
   1742     '%Y-%m-%d %H:%M:%S',
   1743     '%Y-%m-%d %H:%M:%S.%f',
   1744     '%d.%m.%Y %H:%M',
   1745     '%d.%m.%Y %H.%M',
   1746     '%Y-%m-%dT%H:%M:%SZ',
   1747     '%Y-%m-%dT%H:%M:%S.%fZ',
   1748     '%Y-%m-%dT%H:%M:%S.%f0Z',
   1749     '%Y-%m-%dT%H:%M:%S',
   1750     '%Y-%m-%dT%H:%M:%S.%f',
   1751     '%Y-%m-%dT%H:%M',
   1752     '%b %d %Y at %H:%M',
   1753     '%b %d %Y at %H:%M:%S',
   1754     '%B %d %Y at %H:%M',
   1755     '%B %d %Y at %H:%M:%S',
   1756 )
   1757 
   1758 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
   1759 DATE_FORMATS_DAY_FIRST.extend([
   1760     '%d-%m-%Y',
   1761     '%d.%m.%Y',
   1762     '%d.%m.%y',
   1763     '%d/%m/%Y',
   1764     '%d/%m/%y',
   1765     '%d/%m/%Y %H:%M:%S',
   1766 ])
   1767 
   1768 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
   1769 DATE_FORMATS_MONTH_FIRST.extend([
   1770     '%m-%d-%Y',
   1771     '%m.%d.%Y',
   1772     '%m/%d/%Y',
   1773     '%m/%d/%y',
   1774     '%m/%d/%Y %H:%M:%S',
   1775 ])
   1776 
   1777 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
   1778 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
   1779 
   1780 
   1781 def preferredencoding():
   1782     """Get preferred encoding.
   1783 
   1784     Returns the best encoding scheme for the system, based on
   1785     locale.getpreferredencoding() and some further tweaks.
   1786     """
   1787     try:
   1788         pref = locale.getpreferredencoding()
   1789         'TEST'.encode(pref)
   1790     except Exception:
   1791         pref = 'UTF-8'
   1792 
   1793     return pref
   1794 
   1795 
   1796 def write_json_file(obj, fn):
   1797     """ Encode obj as JSON and write it to fn, atomically if possible """
   1798 
   1799     fn = encodeFilename(fn)
   1800     if sys.version_info < (3, 0) and sys.platform != 'win32':
   1801         encoding = get_filesystem_encoding()
   1802         # os.path.basename returns a bytes object, but NamedTemporaryFile
   1803         # will fail if the filename contains non ascii characters unless we
   1804         # use a unicode object
   1805         path_basename = lambda f: os.path.basename(fn).decode(encoding)
   1806         # the same for os.path.dirname
   1807         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
   1808     else:
   1809         path_basename = os.path.basename
   1810         path_dirname = os.path.dirname
   1811 
   1812     args = {
   1813         'suffix': '.tmp',
   1814         'prefix': path_basename(fn) + '.',
   1815         'dir': path_dirname(fn),
   1816         'delete': False,
   1817     }
   1818 
   1819     # In Python 2.x, json.dump expects a bytestream.
   1820     # In Python 3.x, it writes to a character stream
   1821     if sys.version_info < (3, 0):
   1822         args['mode'] = 'wb'
   1823     else:
   1824         args.update({
   1825             'mode': 'w',
   1826             'encoding': 'utf-8',
   1827         })
   1828 
   1829     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
   1830 
   1831     try:
   1832         with tf:
   1833             json.dump(obj, tf)
   1834         if sys.platform == 'win32':
   1835             # Need to remove existing file on Windows, else os.rename raises
   1836             # WindowsError or FileExistsError.
   1837             try:
   1838                 os.unlink(fn)
   1839             except OSError:
   1840                 pass
   1841         try:
   1842             mask = os.umask(0)
   1843             os.umask(mask)
   1844             os.chmod(tf.name, 0o666 & ~mask)
   1845         except OSError:
   1846             pass
   1847         os.rename(tf.name, fn)
   1848     except Exception:
   1849         try:
   1850             os.remove(tf.name)
   1851         except OSError:
   1852             pass
   1853         raise
   1854 
   1855 
   1856 if sys.version_info >= (2, 7):
   1857     def find_xpath_attr(node, xpath, key, val=None):
   1858         """ Find the xpath xpath[@key=val] """
   1859         assert re.match(r'^[a-zA-Z_-]+$', key)
   1860         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
   1861         return node.find(expr)
   1862 else:
   1863     def find_xpath_attr(node, xpath, key, val=None):
   1864         for f in node.findall(compat_xpath(xpath)):
   1865             if key not in f.attrib:
   1866                 continue
   1867             if val is None or f.attrib.get(key) == val:
   1868                 return f
   1869         return None
   1870 
   1871 # On python2.6 the xml.etree.ElementTree.Element methods don't support
   1872 # the namespace parameter
   1873 
   1874 
   1875 def xpath_with_ns(path, ns_map):
   1876     components = [c.split(':') for c in path.split('/')]
   1877     replaced = []
   1878     for c in components:
   1879         if len(c) == 1:
   1880             replaced.append(c[0])
   1881         else:
   1882             ns, tag = c
   1883             replaced.append('{%s}%s' % (ns_map[ns], tag))
   1884     return '/'.join(replaced)
   1885 
   1886 
   1887 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
   1888     def _find_xpath(xpath):
   1889         return node.find(compat_xpath(xpath))
   1890 
   1891     if isinstance(xpath, (str, compat_str)):
   1892         n = _find_xpath(xpath)
   1893     else:
   1894         for xp in xpath:
   1895             n = _find_xpath(xp)
   1896             if n is not None:
   1897                 break
   1898 
   1899     if n is None:
   1900         if default is not NO_DEFAULT:
   1901             return default
   1902         elif fatal:
   1903             name = xpath if name is None else name
   1904             raise ExtractorError('Could not find XML element %s' % name)
   1905         else:
   1906             return None
   1907     return n
   1908 
   1909 
   1910 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
   1911     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
   1912     if n is None or n == default:
   1913         return n
   1914     if n.text is None:
   1915         if default is not NO_DEFAULT:
   1916             return default
   1917         elif fatal:
   1918             name = xpath if name is None else name
   1919             raise ExtractorError('Could not find XML element\'s text %s' % name)
   1920         else:
   1921             return None
   1922     return n.text
   1923 
   1924 
   1925 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
   1926     n = find_xpath_attr(node, xpath, key)
   1927     if n is None:
   1928         if default is not NO_DEFAULT:
   1929             return default
   1930         elif fatal:
   1931             name = '%s[@%s]' % (xpath, key) if name is None else name
   1932             raise ExtractorError('Could not find XML attribute %s' % name)
   1933         else:
   1934             return None
   1935     return n.attrib[key]
   1936 
   1937 
   1938 def get_element_by_id(id, html):
   1939     """Return the content of the tag with the specified ID in the passed HTML document"""
   1940     return get_element_by_attribute('id', id, html)
   1941 
   1942 
   1943 def get_element_by_class(class_name, html):
   1944     """Return the content of the first tag with the specified class in the passed HTML document"""
   1945     retval = get_elements_by_class(class_name, html)
   1946     return retval[0] if retval else None
   1947 
   1948 
   1949 def get_element_by_attribute(attribute, value, html, escape_value=True):
   1950     retval = get_elements_by_attribute(attribute, value, html, escape_value)
   1951     return retval[0] if retval else None
   1952 
   1953 
   1954 def get_elements_by_class(class_name, html):
   1955     """Return the content of all tags with the specified class in the passed HTML document as a list"""
   1956     return get_elements_by_attribute(
   1957         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
   1958         html, escape_value=False)
   1959 
   1960 
   1961 def get_elements_by_attribute(attribute, value, html, escape_value=True):
   1962     """Return the content of the tag with the specified attribute in the passed HTML document"""
   1963 
   1964     value = re.escape(value) if escape_value else value
   1965 
   1966     retlist = []
   1967     for m in re.finditer(r'''(?xs)
   1968         <([a-zA-Z0-9:._-]+)
   1969          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
   1970          \s+%s=['"]?%s['"]?
   1971          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
   1972         \s*>
   1973         (?P<content>.*?)
   1974         </\1>
   1975     ''' % (re.escape(attribute), value), html):
   1976         res = m.group('content')
   1977 
   1978         if res.startswith('"') or res.startswith("'"):
   1979             res = res[1:-1]
   1980 
   1981         retlist.append(unescapeHTML(res))
   1982 
   1983     return retlist
   1984 
   1985 
   1986 class HTMLAttributeParser(compat_HTMLParser):
   1987     """Trivial HTML parser to gather the attributes for a single element"""
   1988     def __init__(self):
   1989         self.attrs = {}
   1990         compat_HTMLParser.__init__(self)
   1991 
   1992     def handle_starttag(self, tag, attrs):
   1993         self.attrs = dict(attrs)
   1994 
   1995 
   1996 def extract_attributes(html_element):
   1997     """Given a string for an HTML element such as
   1998     <el
   1999          a="foo" B="bar" c="&98;az" d=boz
   2000          empty= noval entity="&amp;"
   2001          sq='"' dq="'"
   2002     >
   2003     Decode and return a dictionary of attributes.
   2004     {
   2005         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
   2006         'empty': '', 'noval': None, 'entity': '&',
   2007         'sq': '"', 'dq': '\''
   2008     }.
   2009     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
   2010     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
   2011     """
   2012     parser = HTMLAttributeParser()
   2013     try:
   2014         parser.feed(html_element)
   2015         parser.close()
   2016     # Older Python may throw HTMLParseError in case of malformed HTML
   2017     except compat_HTMLParseError:
   2018         pass
   2019     return parser.attrs
   2020 
   2021 
   2022 def clean_html(html):
   2023     """Clean an HTML snippet into a readable string"""
   2024 
   2025     if html is None:  # Convenience for sanitizing descriptions etc.
   2026         return html
   2027 
   2028     # Newline vs <br />
   2029     html = html.replace('\n', ' ')
   2030     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
   2031     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
   2032     # Strip html tags
   2033     html = re.sub('<.*?>', '', html)
   2034     # Replace html entities
   2035     html = unescapeHTML(html)
   2036     return html.strip()
   2037 
   2038 
   2039 def sanitize_open(filename, open_mode):
   2040     """Try to open the given filename, and slightly tweak it if this fails.
   2041 
   2042     Attempts to open the given filename. If this fails, it tries to change
   2043     the filename slightly, step by step, until it's either able to open it
   2044     or it fails and raises a final exception, like the standard open()
   2045     function.
   2046 
   2047     It returns the tuple (stream, definitive_file_name).
   2048     """
   2049     try:
   2050         if filename == '-':
   2051             if sys.platform == 'win32':
   2052                 import msvcrt
   2053                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
   2054             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
   2055         stream = open(encodeFilename(filename), open_mode)
   2056         return (stream, filename)
   2057     except (IOError, OSError) as err:
   2058         if err.errno in (errno.EACCES,):
   2059             raise
   2060 
   2061         # In case of error, try to remove win32 forbidden chars
   2062         alt_filename = sanitize_path(filename)
   2063         if alt_filename == filename:
   2064             raise
   2065         else:
   2066             # An exception here should be caught in the caller
   2067             stream = open(encodeFilename(alt_filename), open_mode)
   2068             return (stream, alt_filename)
   2069 
   2070 
   2071 def timeconvert(timestr):
   2072     """Convert RFC 2822 defined time string into system timestamp"""
   2073     timestamp = None
   2074     timetuple = email.utils.parsedate_tz(timestr)
   2075     if timetuple is not None:
   2076         timestamp = email.utils.mktime_tz(timetuple)
   2077     return timestamp
   2078 
   2079 
   2080 def sanitize_filename(s, restricted=False, is_id=False):
   2081     """Sanitizes a string so it could be used as part of a filename.
   2082     If restricted is set, use a stricter subset of allowed characters.
   2083     Set is_id if this is not an arbitrary string, but an ID that should be kept
   2084     if possible.
   2085     """
   2086     def replace_insane(char):
   2087         if restricted and char in ACCENT_CHARS:
   2088             return ACCENT_CHARS[char]
   2089         if char == '?' or ord(char) < 32 or ord(char) == 127:
   2090             return ''
   2091         elif char == '"':
   2092             return '' if restricted else '\''
   2093         elif char == ':':
   2094             return '_-' if restricted else ' -'
   2095         elif char in '\\/|*<>':
   2096             return '_'
   2097         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
   2098             return '_'
   2099         if restricted and ord(char) > 127:
   2100             return '_'
   2101         return char
   2102 
   2103     # Handle timestamps
   2104     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
   2105     result = ''.join(map(replace_insane, s))
   2106     if not is_id:
   2107         while '__' in result:
   2108             result = result.replace('__', '_')
   2109         result = result.strip('_')
   2110         # Common case of "Foreign band name - English song title"
   2111         if restricted and result.startswith('-_'):
   2112             result = result[2:]
   2113         if result.startswith('-'):
   2114             result = '_' + result[len('-'):]
   2115         result = result.lstrip('.')
   2116         if not result:
   2117             result = '_'
   2118     return result
   2119 
   2120 
   2121 def sanitize_path(s):
   2122     """Sanitizes and normalizes path on Windows"""
   2123     if sys.platform != 'win32':
   2124         return s
   2125     drive_or_unc, _ = os.path.splitdrive(s)
   2126     if sys.version_info < (2, 7) and not drive_or_unc:
   2127         drive_or_unc, _ = os.path.splitunc(s)
   2128     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
   2129     if drive_or_unc:
   2130         norm_path.pop(0)
   2131     sanitized_path = [
   2132         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
   2133         for path_part in norm_path]
   2134     if drive_or_unc:
   2135         sanitized_path.insert(0, drive_or_unc + os.path.sep)
   2136     return os.path.join(*sanitized_path)
   2137 
   2138 
   2139 def sanitize_url(url):
   2140     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
   2141     # the number of unwanted failures due to missing protocol
   2142     if url.startswith('//'):
   2143         return 'http:%s' % url
   2144     # Fix some common typos seen so far
   2145     COMMON_TYPOS = (
   2146         # https://github.com/ytdl-org/youtube-dl/issues/15649
   2147         (r'^httpss://', r'https://'),
   2148         # https://bx1.be/lives/direct-tv/
   2149         (r'^rmtp([es]?)://', r'rtmp\1://'),
   2150     )
   2151     for mistake, fixup in COMMON_TYPOS:
   2152         if re.match(mistake, url):
   2153             return re.sub(mistake, fixup, url)
   2154     return url
   2155 
   2156 
   2157 def sanitized_Request(url, *args, **kwargs):
   2158     return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
   2159 
   2160 
   2161 def expand_path(s):
   2162     """Expand shell variables and ~"""
   2163     return os.path.expandvars(compat_expanduser(s))
   2164 
   2165 
   2166 def orderedSet(iterable):
   2167     """ Remove all duplicates from the input iterable """
   2168     res = []
   2169     for el in iterable:
   2170         if el not in res:
   2171             res.append(el)
   2172     return res
   2173 
   2174 
   2175 def _htmlentity_transform(entity_with_semicolon):
   2176     """Transforms an HTML entity to a character."""
   2177     entity = entity_with_semicolon[:-1]
   2178 
   2179     # Known non-numeric HTML entity
   2180     if entity in compat_html_entities.name2codepoint:
   2181         return compat_chr(compat_html_entities.name2codepoint[entity])
   2182 
   2183     # TODO: HTML5 allows entities without a semicolon. For example,
   2184     # '&Eacuteric' should be decoded as 'Éric'.
   2185     if entity_with_semicolon in compat_html_entities_html5:
   2186         return compat_html_entities_html5[entity_with_semicolon]
   2187 
   2188     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
   2189     if mobj is not None:
   2190         numstr = mobj.group(1)
   2191         if numstr.startswith('x'):
   2192             base = 16
   2193             numstr = '0%s' % numstr
   2194         else:
   2195             base = 10
   2196         # See https://github.com/ytdl-org/youtube-dl/issues/7518
   2197         try:
   2198             return compat_chr(int(numstr, base))
   2199         except ValueError:
   2200             pass
   2201 
   2202     # Unknown entity in name, return its literal representation
   2203     return '&%s;' % entity
   2204 
   2205 
   2206 def unescapeHTML(s):
   2207     if s is None:
   2208         return None
   2209     assert type(s) == compat_str
   2210 
   2211     return re.sub(
   2212         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
   2213 
   2214 
   2215 def get_subprocess_encoding():
   2216     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
   2217         # For subprocess calls, encode with locale encoding
   2218         # Refer to http://stackoverflow.com/a/9951851/35070
   2219         encoding = preferredencoding()
   2220     else:
   2221         encoding = sys.getfilesystemencoding()
   2222     if encoding is None:
   2223         encoding = 'utf-8'
   2224     return encoding
   2225 
   2226 
   2227 def encodeFilename(s, for_subprocess=False):
   2228     """
   2229     @param s The name of the file
   2230     """
   2231 
   2232     assert type(s) == compat_str
   2233 
   2234     # Python 3 has a Unicode API
   2235     if sys.version_info >= (3, 0):
   2236         return s
   2237 
   2238     # Pass '' directly to use Unicode APIs on Windows 2000 and up
   2239     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
   2240     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
   2241     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
   2242         return s
   2243 
   2244     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
   2245     if sys.platform.startswith('java'):
   2246         return s
   2247 
   2248     return s.encode(get_subprocess_encoding(), 'ignore')
   2249 
   2250 
   2251 def decodeFilename(b, for_subprocess=False):
   2252 
   2253     if sys.version_info >= (3, 0):
   2254         return b
   2255 
   2256     if not isinstance(b, bytes):
   2257         return b
   2258 
   2259     return b.decode(get_subprocess_encoding(), 'ignore')
   2260 
   2261 
   2262 def encodeArgument(s):
   2263     if not isinstance(s, compat_str):
   2264         # Legacy code that uses byte strings
   2265         # Uncomment the following line after fixing all post processors
   2266         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
   2267         s = s.decode('ascii')
   2268     return encodeFilename(s, True)
   2269 
   2270 
   2271 def decodeArgument(b):
   2272     return decodeFilename(b, True)
   2273 
   2274 
   2275 def decodeOption(optval):
   2276     if optval is None:
   2277         return optval
   2278     if isinstance(optval, bytes):
   2279         optval = optval.decode(preferredencoding())
   2280 
   2281     assert isinstance(optval, compat_str)
   2282     return optval
   2283 
   2284 
   2285 def formatSeconds(secs):
   2286     if secs > 3600:
   2287         return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
   2288     elif secs > 60:
   2289         return '%d:%02d' % (secs // 60, secs % 60)
   2290     else:
   2291         return '%d' % secs
   2292 
   2293 
   2294 def make_HTTPS_handler(params, **kwargs):
   2295     opts_no_check_certificate = params.get('nocheckcertificate', False)
   2296     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
   2297         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
   2298         if opts_no_check_certificate:
   2299             context.check_hostname = False
   2300             context.verify_mode = ssl.CERT_NONE
   2301         try:
   2302             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
   2303         except TypeError:
   2304             # Python 2.7.8
   2305             # (create_default_context present but HTTPSHandler has no context=)
   2306             pass
   2307 
   2308     if sys.version_info < (3, 2):
   2309         return YoutubeDLHTTPSHandler(params, **kwargs)
   2310     else:  # Python < 3.4
   2311         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
   2312         context.verify_mode = (ssl.CERT_NONE
   2313                                if opts_no_check_certificate
   2314                                else ssl.CERT_REQUIRED)
   2315         context.set_default_verify_paths()
   2316         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
   2317 
   2318 
   2319 def bug_reports_message():
   2320     if ytdl_is_updateable():
   2321         update_cmd = 'type  youtube-dl -U  to update'
   2322     else:
   2323         update_cmd = 'see  https://yt-dl.org/update  on how to update'
   2324     msg = '; please report this issue on https://yt-dl.org/bug .'
   2325     msg += ' Make sure you are using the latest version; %s.' % update_cmd
   2326     msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
   2327     return msg
   2328 
   2329 
   2330 class YoutubeDLError(Exception):
   2331     """Base exception for YoutubeDL errors."""
   2332     pass
   2333 
   2334 
   2335 class ExtractorError(YoutubeDLError):
   2336     """Error during info extraction."""
   2337 
   2338     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
   2339         """ tb, if given, is the original traceback (so that it can be printed out).
   2340         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
   2341         """
   2342 
   2343         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
   2344             expected = True
   2345         if video_id is not None:
   2346             msg = video_id + ': ' + msg
   2347         if cause:
   2348             msg += ' (caused by %r)' % cause
   2349         if not expected:
   2350             msg += bug_reports_message()
   2351         super(ExtractorError, self).__init__(msg)
   2352 
   2353         self.traceback = tb
   2354         self.exc_info = sys.exc_info()  # preserve original exception
   2355         self.cause = cause
   2356         self.video_id = video_id
   2357 
   2358     def format_traceback(self):
   2359         if self.traceback is None:
   2360             return None
   2361         return ''.join(traceback.format_tb(self.traceback))
   2362 
   2363 
   2364 class UnsupportedError(ExtractorError):
   2365     def __init__(self, url):
   2366         super(UnsupportedError, self).__init__(
   2367             'Unsupported URL: %s' % url, expected=True)
   2368         self.url = url
   2369 
   2370 
   2371 class RegexNotFoundError(ExtractorError):
   2372     """Error when a regex didn't match"""
   2373     pass
   2374 
   2375 
   2376 class GeoRestrictedError(ExtractorError):
   2377     """Geographic restriction Error exception.
   2378 
   2379     This exception may be thrown when a video is not available from your
   2380     geographic location due to geographic restrictions imposed by a website.
   2381     """
   2382     def __init__(self, msg, countries=None):
   2383         super(GeoRestrictedError, self).__init__(msg, expected=True)
   2384         self.msg = msg
   2385         self.countries = countries
   2386 
   2387 
   2388 class DownloadError(YoutubeDLError):
   2389     """Download Error exception.
   2390 
   2391     This exception may be thrown by FileDownloader objects if they are not
   2392     configured to continue on errors. They will contain the appropriate
   2393     error message.
   2394     """
   2395 
   2396     def __init__(self, msg, exc_info=None):
   2397         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
   2398         super(DownloadError, self).__init__(msg)
   2399         self.exc_info = exc_info
   2400 
   2401 
   2402 class SameFileError(YoutubeDLError):
   2403     """Same File exception.
   2404 
   2405     This exception will be thrown by FileDownloader objects if they detect
   2406     multiple files would have to be downloaded to the same file on disk.
   2407     """
   2408     pass
   2409 
   2410 
   2411 class PostProcessingError(YoutubeDLError):
   2412     """Post Processing exception.
   2413 
   2414     This exception may be raised by PostProcessor's .run() method to
   2415     indicate an error in the postprocessing task.
   2416     """
   2417 
   2418     def __init__(self, msg):
   2419         super(PostProcessingError, self).__init__(msg)
   2420         self.msg = msg
   2421 
   2422 
   2423 class MaxDownloadsReached(YoutubeDLError):
   2424     """ --max-downloads limit has been reached. """
   2425     pass
   2426 
   2427 
   2428 class UnavailableVideoError(YoutubeDLError):
   2429     """Unavailable Format exception.
   2430 
   2431     This exception will be thrown when a video is requested
   2432     in a format that is not available for that video.
   2433     """
   2434     pass
   2435 
   2436 
   2437 class ContentTooShortError(YoutubeDLError):
   2438     """Content Too Short exception.
   2439 
   2440     This exception may be raised by FileDownloader objects when a file they
   2441     download is too small for what the server announced first, indicating
   2442     the connection was probably interrupted.
   2443     """
   2444 
   2445     def __init__(self, downloaded, expected):
   2446         super(ContentTooShortError, self).__init__(
   2447             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
   2448         )
   2449         # Both in bytes
   2450         self.downloaded = downloaded
   2451         self.expected = expected
   2452 
   2453 
   2454 class XAttrMetadataError(YoutubeDLError):
   2455     def __init__(self, code=None, msg='Unknown error'):
   2456         super(XAttrMetadataError, self).__init__(msg)
   2457         self.code = code
   2458         self.msg = msg
   2459 
   2460         # Parsing code and msg
   2461         if (self.code in (errno.ENOSPC, errno.EDQUOT)
   2462                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
   2463             self.reason = 'NO_SPACE'
   2464         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
   2465             self.reason = 'VALUE_TOO_LONG'
   2466         else:
   2467             self.reason = 'NOT_SUPPORTED'
   2468 
   2469 
   2470 class XAttrUnavailableError(YoutubeDLError):
   2471     pass
   2472 
   2473 
   2474 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
   2475     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
   2476     # expected HTTP responses to meet HTTP/1.0 or later (see also
   2477     # https://github.com/ytdl-org/youtube-dl/issues/6727)
   2478     if sys.version_info < (3, 0):
   2479         kwargs['strict'] = True
   2480     hc = http_class(*args, **compat_kwargs(kwargs))
   2481     source_address = ydl_handler._params.get('source_address')
   2482 
   2483     if source_address is not None:
   2484         # This is to workaround _create_connection() from socket where it will try all
   2485         # address data from getaddrinfo() including IPv6. This filters the result from
   2486         # getaddrinfo() based on the source_address value.
   2487         # This is based on the cpython socket.create_connection() function.
   2488         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
   2489         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
   2490             host, port = address
   2491             err = None
   2492             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
   2493             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
   2494             ip_addrs = [addr for addr in addrs if addr[0] == af]
   2495             if addrs and not ip_addrs:
   2496                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
   2497                 raise socket.error(
   2498                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
   2499                     % (ip_version, source_address[0]))
   2500             for res in ip_addrs:
   2501                 af, socktype, proto, canonname, sa = res
   2502                 sock = None
   2503                 try:
   2504                     sock = socket.socket(af, socktype, proto)
   2505                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
   2506                         sock.settimeout(timeout)
   2507                     sock.bind(source_address)
   2508                     sock.connect(sa)
   2509                     err = None  # Explicitly break reference cycle
   2510                     return sock
   2511                 except socket.error as _:
   2512                     err = _
   2513                     if sock is not None:
   2514                         sock.close()
   2515             if err is not None:
   2516                 raise err
   2517             else:
   2518                 raise socket.error('getaddrinfo returns an empty list')
   2519         if hasattr(hc, '_create_connection'):
   2520             hc._create_connection = _create_connection
   2521         sa = (source_address, 0)
   2522         if hasattr(hc, 'source_address'):  # Python 2.7+
   2523             hc.source_address = sa
   2524         else:  # Python 2.6
   2525             def _hc_connect(self, *args, **kwargs):
   2526                 sock = _create_connection(
   2527                     (self.host, self.port), self.timeout, sa)
   2528                 if is_https:
   2529                     self.sock = ssl.wrap_socket(
   2530                         sock, self.key_file, self.cert_file,
   2531                         ssl_version=ssl.PROTOCOL_TLSv1)
   2532                 else:
   2533                     self.sock = sock
   2534             hc.connect = functools.partial(_hc_connect, hc)
   2535 
   2536     return hc
   2537 
   2538 
   2539 def handle_youtubedl_headers(headers):
   2540     filtered_headers = headers
   2541 
   2542     if 'Youtubedl-no-compression' in filtered_headers:
   2543         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
   2544         del filtered_headers['Youtubedl-no-compression']
   2545 
   2546     return filtered_headers
   2547 
   2548 
   2549 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
   2550     """Handler for HTTP requests and responses.
   2551 
   2552     This class, when installed with an OpenerDirector, automatically adds
   2553     the standard headers to every HTTP request and handles gzipped and
   2554     deflated responses from web servers. If compression is to be avoided in
   2555     a particular request, the original request in the program code only has
   2556     to include the HTTP header "Youtubedl-no-compression", which will be
   2557     removed before making the real request.
   2558 
   2559     Part of this code was copied from:
   2560 
   2561     http://techknack.net/python-urllib2-handlers/
   2562 
   2563     Andrew Rowls, the author of that code, agreed to release it to the
   2564     public domain.
   2565     """
   2566 
   2567     def __init__(self, params, *args, **kwargs):
   2568         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
   2569         self._params = params
   2570 
   2571     def http_open(self, req):
   2572         conn_class = compat_http_client.HTTPConnection
   2573 
   2574         socks_proxy = req.headers.get('Ytdl-socks-proxy')
   2575         if socks_proxy:
   2576             conn_class = make_socks_conn_class(conn_class, socks_proxy)
   2577             del req.headers['Ytdl-socks-proxy']
   2578 
   2579         return self.do_open(functools.partial(
   2580             _create_http_connection, self, conn_class, False),
   2581             req)
   2582 
   2583     @staticmethod
   2584     def deflate(data):
   2585         try:
   2586             return zlib.decompress(data, -zlib.MAX_WBITS)
   2587         except zlib.error:
   2588             return zlib.decompress(data)
   2589 
   2590     def http_request(self, req):
   2591         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
   2592         # always respected by websites, some tend to give out URLs with non percent-encoded
   2593         # non-ASCII characters (see telemb.py, ard.py [#3412])
   2594         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
   2595         # To work around aforementioned issue we will replace request's original URL with
   2596         # percent-encoded one
   2597         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
   2598         # the code of this workaround has been moved here from YoutubeDL.urlopen()
   2599         url = req.get_full_url()
   2600         url_escaped = escape_url(url)
   2601 
   2602         # Substitute URL if any change after escaping
   2603         if url != url_escaped:
   2604             req = update_Request(req, url=url_escaped)
   2605 
   2606         for h, v in std_headers.items():
   2607             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
   2608             # The dict keys are capitalized because of this bug by urllib
   2609             if h.capitalize() not in req.headers:
   2610                 req.add_header(h, v)
   2611 
   2612         req.headers = handle_youtubedl_headers(req.headers)
   2613 
   2614         if sys.version_info < (2, 7) and '#' in req.get_full_url():
   2615             # Python 2.6 is brain-dead when it comes to fragments
   2616             req._Request__original = req._Request__original.partition('#')[0]
   2617             req._Request__r_type = req._Request__r_type.partition('#')[0]
   2618 
   2619         return req
   2620 
   2621     def http_response(self, req, resp):
   2622         old_resp = resp
   2623         # gzip
   2624         if resp.headers.get('Content-encoding', '') == 'gzip':
   2625             content = resp.read()
   2626             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
   2627             try:
   2628                 uncompressed = io.BytesIO(gz.read())
   2629             except IOError as original_ioerror:
   2630                 # There may be junk add the end of the file
   2631                 # See http://stackoverflow.com/q/4928560/35070 for details
   2632                 for i in range(1, 1024):
   2633                     try:
   2634                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
   2635                         uncompressed = io.BytesIO(gz.read())
   2636                     except IOError:
   2637                         continue
   2638                     break
   2639                 else:
   2640                     raise original_ioerror
   2641             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
   2642             resp.msg = old_resp.msg
   2643             del resp.headers['Content-encoding']
   2644         # deflate
   2645         if resp.headers.get('Content-encoding', '') == 'deflate':
   2646             gz = io.BytesIO(self.deflate(resp.read()))
   2647             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
   2648             resp.msg = old_resp.msg
   2649             del resp.headers['Content-encoding']
   2650         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
   2651         # https://github.com/ytdl-org/youtube-dl/issues/6457).
   2652         if 300 <= resp.code < 400:
   2653             location = resp.headers.get('Location')
   2654             if location:
   2655                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
   2656                 if sys.version_info >= (3, 0):
   2657                     location = location.encode('iso-8859-1').decode('utf-8')
   2658                 else:
   2659                     location = location.decode('utf-8')
   2660                 location_escaped = escape_url(location)
   2661                 if location != location_escaped:
   2662                     del resp.headers['Location']
   2663                     if sys.version_info < (3, 0):
   2664                         location_escaped = location_escaped.encode('utf-8')
   2665                     resp.headers['Location'] = location_escaped
   2666         return resp
   2667 
   2668     https_request = http_request
   2669     https_response = http_response
   2670 
   2671 
   2672 def make_socks_conn_class(base_class, socks_proxy):
   2673     assert issubclass(base_class, (
   2674         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
   2675 
   2676     url_components = compat_urlparse.urlparse(socks_proxy)
   2677     if url_components.scheme.lower() == 'socks5':
   2678         socks_type = ProxyType.SOCKS5
   2679     elif url_components.scheme.lower() in ('socks', 'socks4'):
   2680         socks_type = ProxyType.SOCKS4
   2681     elif url_components.scheme.lower() == 'socks4a':
   2682         socks_type = ProxyType.SOCKS4A
   2683 
   2684     def unquote_if_non_empty(s):
   2685         if not s:
   2686             return s
   2687         return compat_urllib_parse_unquote_plus(s)
   2688 
   2689     proxy_args = (
   2690         socks_type,
   2691         url_components.hostname, url_components.port or 1080,
   2692         True,  # Remote DNS
   2693         unquote_if_non_empty(url_components.username),
   2694         unquote_if_non_empty(url_components.password),
   2695     )
   2696 
   2697     class SocksConnection(base_class):
   2698         def connect(self):
   2699             self.sock = sockssocket()
   2700             self.sock.setproxy(*proxy_args)
   2701             if type(self.timeout) in (int, float):
   2702                 self.sock.settimeout(self.timeout)
   2703             self.sock.connect((self.host, self.port))
   2704 
   2705             if isinstance(self, compat_http_client.HTTPSConnection):
   2706                 if hasattr(self, '_context'):  # Python > 2.6
   2707                     self.sock = self._context.wrap_socket(
   2708                         self.sock, server_hostname=self.host)
   2709                 else:
   2710                     self.sock = ssl.wrap_socket(self.sock)
   2711 
   2712     return SocksConnection
   2713 
   2714 
   2715 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
   2716     def __init__(self, params, https_conn_class=None, *args, **kwargs):
   2717         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
   2718         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
   2719         self._params = params
   2720 
   2721     def https_open(self, req):
   2722         kwargs = {}
   2723         conn_class = self._https_conn_class
   2724 
   2725         if hasattr(self, '_context'):  # python > 2.6
   2726             kwargs['context'] = self._context
   2727         if hasattr(self, '_check_hostname'):  # python 3.x
   2728             kwargs['check_hostname'] = self._check_hostname
   2729 
   2730         socks_proxy = req.headers.get('Ytdl-socks-proxy')
   2731         if socks_proxy:
   2732             conn_class = make_socks_conn_class(conn_class, socks_proxy)
   2733             del req.headers['Ytdl-socks-proxy']
   2734 
   2735         return self.do_open(functools.partial(
   2736             _create_http_connection, self, conn_class, True),
   2737             req, **kwargs)
   2738 
   2739 
   2740 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
   2741     """
   2742     See [1] for cookie file format.
   2743 
   2744     1. https://curl.haxx.se/docs/http-cookies.html
   2745     """
   2746     _HTTPONLY_PREFIX = '#HttpOnly_'
   2747     _ENTRY_LEN = 7
   2748     _HEADER = '''# Netscape HTTP Cookie File
   2749 # This file is generated by youtube-dl.  Do not edit.
   2750 
   2751 '''
   2752     _CookieFileEntry = collections.namedtuple(
   2753         'CookieFileEntry',
   2754         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
   2755 
   2756     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
   2757         """
   2758         Save cookies to a file.
   2759 
   2760         Most of the code is taken from CPython 3.8 and slightly adapted
   2761         to support cookie files with UTF-8 in both python 2 and 3.
   2762         """
   2763         if filename is None:
   2764             if self.filename is not None:
   2765                 filename = self.filename
   2766             else:
   2767                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
   2768 
   2769         # Store session cookies with `expires` set to 0 instead of an empty
   2770         # string
   2771         for cookie in self:
   2772             if cookie.expires is None:
   2773                 cookie.expires = 0
   2774 
   2775         with io.open(filename, 'w', encoding='utf-8') as f:
   2776             f.write(self._HEADER)
   2777             now = time.time()
   2778             for cookie in self:
   2779                 if not ignore_discard and cookie.discard:
   2780                     continue
   2781                 if not ignore_expires and cookie.is_expired(now):
   2782                     continue
   2783                 if cookie.secure:
   2784                     secure = 'TRUE'
   2785                 else:
   2786                     secure = 'FALSE'
   2787                 if cookie.domain.startswith('.'):
   2788                     initial_dot = 'TRUE'
   2789                 else:
   2790                     initial_dot = 'FALSE'
   2791                 if cookie.expires is not None:
   2792                     expires = compat_str(cookie.expires)
   2793                 else:
   2794                     expires = ''
   2795                 if cookie.value is None:
   2796                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
   2797                     # with no name, whereas http.cookiejar regards it as a
   2798                     # cookie with no value.
   2799                     name = ''
   2800                     value = cookie.name
   2801                 else:
   2802                     name = cookie.name
   2803                     value = cookie.value
   2804                 f.write(
   2805                     '\t'.join([cookie.domain, initial_dot, cookie.path,
   2806                                secure, expires, name, value]) + '\n')
   2807 
   2808     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
   2809         """Load cookies from a file."""
   2810         if filename is None:
   2811             if self.filename is not None:
   2812                 filename = self.filename
   2813             else:
   2814                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
   2815 
   2816         def prepare_line(line):
   2817             if line.startswith(self._HTTPONLY_PREFIX):
   2818                 line = line[len(self._HTTPONLY_PREFIX):]
   2819             # comments and empty lines are fine
   2820             if line.startswith('#') or not line.strip():
   2821                 return line
   2822             cookie_list = line.split('\t')
   2823             if len(cookie_list) != self._ENTRY_LEN:
   2824                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
   2825             cookie = self._CookieFileEntry(*cookie_list)
   2826             if cookie.expires_at and not cookie.expires_at.isdigit():
   2827                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
   2828             return line
   2829 
   2830         cf = io.StringIO()
   2831         with io.open(filename, encoding='utf-8') as f:
   2832             for line in f:
   2833                 try:
   2834                     cf.write(prepare_line(line))
   2835                 except compat_cookiejar.LoadError as e:
   2836                     write_string(
   2837                         'WARNING: skipping cookie file entry due to %s: %r\n'
   2838                         % (e, line), sys.stderr)
   2839                     continue
   2840         cf.seek(0)
   2841         self._really_load(cf, filename, ignore_discard, ignore_expires)
   2842         # Session cookies are denoted by either `expires` field set to
   2843         # an empty string or 0. MozillaCookieJar only recognizes the former
   2844         # (see [1]). So we need force the latter to be recognized as session
   2845         # cookies on our own.
   2846         # Session cookies may be important for cookies-based authentication,
   2847         # e.g. usually, when user does not check 'Remember me' check box while
   2848         # logging in on a site, some important cookies are stored as session
   2849         # cookies so that not recognizing them will result in failed login.
   2850         # 1. https://bugs.python.org/issue17164
   2851         for cookie in self:
   2852             # Treat `expires=0` cookies as session cookies
   2853             if cookie.expires == 0:
   2854                 cookie.expires = None
   2855                 cookie.discard = True
   2856 
   2857 
   2858 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
   2859     def __init__(self, cookiejar=None):
   2860         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
   2861 
   2862     def http_response(self, request, response):
   2863         # Python 2 will choke on next HTTP request in row if there are non-ASCII
   2864         # characters in Set-Cookie HTTP header of last response (see
   2865         # https://github.com/ytdl-org/youtube-dl/issues/6769).
   2866         # In order to at least prevent crashing we will percent encode Set-Cookie
   2867         # header before HTTPCookieProcessor starts processing it.
   2868         # if sys.version_info < (3, 0) and response.headers:
   2869         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
   2870         #         set_cookie = response.headers.get(set_cookie_header)
   2871         #         if set_cookie:
   2872         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
   2873         #             if set_cookie != set_cookie_escaped:
   2874         #                 del response.headers[set_cookie_header]
   2875         #                 response.headers[set_cookie_header] = set_cookie_escaped
   2876         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
   2877 
   2878     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
   2879     https_response = http_response
   2880 
   2881 
   2882 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
   2883     """YoutubeDL redirect handler
   2884 
   2885     The code is based on HTTPRedirectHandler implementation from CPython [1].
   2886 
   2887     This redirect handler solves two issues:
   2888      - ensures redirect URL is always unicode under python 2
   2889      - introduces support for experimental HTTP response status code
   2890        308 Permanent Redirect [2] used by some sites [3]
   2891 
   2892     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
   2893     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
   2894     3. https://github.com/ytdl-org/youtube-dl/issues/28768
   2895     """
   2896 
   2897     http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
   2898 
   2899     def redirect_request(self, req, fp, code, msg, headers, newurl):
   2900         """Return a Request or None in response to a redirect.
   2901 
   2902         This is called by the http_error_30x methods when a
   2903         redirection response is received.  If a redirection should
   2904         take place, return a new Request to allow http_error_30x to
   2905         perform the redirect.  Otherwise, raise HTTPError if no-one
   2906         else should try to handle this url.  Return None if you can't
   2907         but another Handler might.
   2908         """
   2909         m = req.get_method()
   2910         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
   2911                  or code in (301, 302, 303) and m == "POST")):
   2912             raise compat_HTTPError(req.full_url, code, msg, headers, fp)
   2913         # Strictly (according to RFC 2616), 301 or 302 in response to
   2914         # a POST MUST NOT cause a redirection without confirmation
   2915         # from the user (of urllib.request, in this case).  In practice,
   2916         # essentially all clients do redirect in this case, so we do
   2917         # the same.
   2918 
   2919         # On python 2 urlh.geturl() may sometimes return redirect URL
   2920         # as byte string instead of unicode. This workaround allows
   2921         # to force it always return unicode.
   2922         if sys.version_info[0] < 3:
   2923             newurl = compat_str(newurl)
   2924 
   2925         # Be conciliant with URIs containing a space.  This is mainly
   2926         # redundant with the more complete encoding done in http_error_302(),
   2927         # but it is kept for compatibility with other callers.
   2928         newurl = newurl.replace(' ', '%20')
   2929 
   2930         CONTENT_HEADERS = ("content-length", "content-type")
   2931         # NB: don't use dict comprehension for python 2.6 compatibility
   2932         newheaders = dict((k, v) for k, v in req.headers.items()
   2933                           if k.lower() not in CONTENT_HEADERS)
   2934         return compat_urllib_request.Request(
   2935             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
   2936             unverifiable=True)
   2937 
   2938 
   2939 def extract_timezone(date_str):
   2940     m = re.search(
   2941         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
   2942         date_str)
   2943     if not m:
   2944         timezone = datetime.timedelta()
   2945     else:
   2946         date_str = date_str[:-len(m.group('tz'))]
   2947         if not m.group('sign'):
   2948             timezone = datetime.timedelta()
   2949         else:
   2950             sign = 1 if m.group('sign') == '+' else -1
   2951             timezone = datetime.timedelta(
   2952                 hours=sign * int(m.group('hours')),
   2953                 minutes=sign * int(m.group('minutes')))
   2954     return timezone, date_str
   2955 
   2956 
   2957 def parse_iso8601(date_str, delimiter='T', timezone=None):
   2958     """ Return a UNIX timestamp from the given date """
   2959 
   2960     if date_str is None:
   2961         return None
   2962 
   2963     date_str = re.sub(r'\.[0-9]+', '', date_str)
   2964 
   2965     if timezone is None:
   2966         timezone, date_str = extract_timezone(date_str)
   2967 
   2968     try:
   2969         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
   2970         dt = datetime.datetime.strptime(date_str, date_format) - timezone
   2971         return calendar.timegm(dt.timetuple())
   2972     except ValueError:
   2973         pass
   2974 
   2975 
   2976 def date_formats(day_first=True):
   2977     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
   2978 
   2979 
   2980 def unified_strdate(date_str, day_first=True):
   2981     """Return a string with the date in the format YYYYMMDD"""
   2982 
   2983     if date_str is None:
   2984         return None
   2985     upload_date = None
   2986     # Replace commas
   2987     date_str = date_str.replace(',', ' ')
   2988     # Remove AM/PM + timezone
   2989     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
   2990     _, date_str = extract_timezone(date_str)
   2991 
   2992     for expression in date_formats(day_first):
   2993         try:
   2994             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
   2995         except ValueError:
   2996             pass
   2997     if upload_date is None:
   2998         timetuple = email.utils.parsedate_tz(date_str)
   2999         if timetuple:
   3000             try:
   3001                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
   3002             except ValueError:
   3003                 pass
   3004     if upload_date is not None:
   3005         return compat_str(upload_date)
   3006 
   3007 
   3008 def unified_timestamp(date_str, day_first=True):
   3009     if date_str is None:
   3010         return None
   3011 
   3012     date_str = re.sub(r'[,|]', '', date_str)
   3013 
   3014     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
   3015     timezone, date_str = extract_timezone(date_str)
   3016 
   3017     # Remove AM/PM + timezone
   3018     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
   3019 
   3020     # Remove unrecognized timezones from ISO 8601 alike timestamps
   3021     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
   3022     if m:
   3023         date_str = date_str[:-len(m.group('tz'))]
   3024 
   3025     # Python only supports microseconds, so remove nanoseconds
   3026     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
   3027     if m:
   3028         date_str = m.group(1)
   3029 
   3030     for expression in date_formats(day_first):
   3031         try:
   3032             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
   3033             return calendar.timegm(dt.timetuple())
   3034         except ValueError:
   3035             pass
   3036     timetuple = email.utils.parsedate_tz(date_str)
   3037     if timetuple:
   3038         return calendar.timegm(timetuple) + pm_delta * 3600
   3039 
   3040 
   3041 def determine_ext(url, default_ext='unknown_video'):
   3042     if url is None or '.' not in url:
   3043         return default_ext
   3044     guess = url.partition('?')[0].rpartition('.')[2]
   3045     if re.match(r'^[A-Za-z0-9]+$', guess):
   3046         return guess
   3047     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
   3048     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
   3049         return guess.rstrip('/')
   3050     else:
   3051         return default_ext
   3052 
   3053 
   3054 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
   3055     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
   3056 
   3057 
   3058 def date_from_str(date_str):
   3059     """
   3060     Return a datetime object from a string in the format YYYYMMDD or
   3061     (now|today)[+-][0-9](day|week|month|year)(s)?"""
   3062     today = datetime.date.today()
   3063     if date_str in ('now', 'today'):
   3064         return today
   3065     if date_str == 'yesterday':
   3066         return today - datetime.timedelta(days=1)
   3067     match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
   3068     if match is not None:
   3069         sign = match.group('sign')
   3070         time = int(match.group('time'))
   3071         if sign == '-':
   3072             time = -time
   3073         unit = match.group('unit')
   3074         # A bad approximation?
   3075         if unit == 'month':
   3076             unit = 'day'
   3077             time *= 30
   3078         elif unit == 'year':
   3079             unit = 'day'
   3080             time *= 365
   3081         unit += 's'
   3082         delta = datetime.timedelta(**{unit: time})
   3083         return today + delta
   3084     return datetime.datetime.strptime(date_str, '%Y%m%d').date()
   3085 
   3086 
   3087 def hyphenate_date(date_str):
   3088     """
   3089     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
   3090     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
   3091     if match is not None:
   3092         return '-'.join(match.groups())
   3093     else:
   3094         return date_str
   3095 
   3096 
   3097 class DateRange(object):
   3098     """Represents a time interval between two dates"""
   3099 
   3100     def __init__(self, start=None, end=None):
   3101         """start and end must be strings in the format accepted by date"""
   3102         if start is not None:
   3103             self.start = date_from_str(start)
   3104         else:
   3105             self.start = datetime.datetime.min.date()
   3106         if end is not None:
   3107             self.end = date_from_str(end)
   3108         else:
   3109             self.end = datetime.datetime.max.date()
   3110         if self.start > self.end:
   3111             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
   3112 
   3113     @classmethod
   3114     def day(cls, day):
   3115         """Returns a range that only contains the given day"""
   3116         return cls(day, day)
   3117 
   3118     def __contains__(self, date):
   3119         """Check if the date is in the range"""
   3120         if not isinstance(date, datetime.date):
   3121             date = date_from_str(date)
   3122         return self.start <= date <= self.end
   3123 
   3124     def __str__(self):
   3125         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
   3126 
   3127 
   3128 def platform_name():
   3129     """ Returns the platform name as a compat_str """
   3130     res = platform.platform()
   3131     if isinstance(res, bytes):
   3132         res = res.decode(preferredencoding())
   3133 
   3134     assert isinstance(res, compat_str)
   3135     return res
   3136 
   3137 
   3138 def _windows_write_string(s, out):
   3139     """ Returns True if the string was written using special methods,
   3140     False if it has yet to be written out."""
   3141     # Adapted from http://stackoverflow.com/a/3259271/35070
   3142 
   3143     import ctypes
   3144     import ctypes.wintypes
   3145 
   3146     WIN_OUTPUT_IDS = {
   3147         1: -11,
   3148         2: -12,
   3149     }
   3150 
   3151     try:
   3152         fileno = out.fileno()
   3153     except AttributeError:
   3154         # If the output stream doesn't have a fileno, it's virtual
   3155         return False
   3156     except io.UnsupportedOperation:
   3157         # Some strange Windows pseudo files?
   3158         return False
   3159     if fileno not in WIN_OUTPUT_IDS:
   3160         return False
   3161 
   3162     GetStdHandle = compat_ctypes_WINFUNCTYPE(
   3163         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
   3164         ('GetStdHandle', ctypes.windll.kernel32))
   3165     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
   3166 
   3167     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
   3168         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
   3169         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
   3170         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
   3171     written = ctypes.wintypes.DWORD(0)
   3172 
   3173     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
   3174     FILE_TYPE_CHAR = 0x0002
   3175     FILE_TYPE_REMOTE = 0x8000
   3176     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
   3177         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
   3178         ctypes.POINTER(ctypes.wintypes.DWORD))(
   3179         ('GetConsoleMode', ctypes.windll.kernel32))
   3180     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
   3181 
   3182     def not_a_console(handle):
   3183         if handle == INVALID_HANDLE_VALUE or handle is None:
   3184             return True
   3185         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
   3186                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
   3187 
   3188     if not_a_console(h):
   3189         return False
   3190 
   3191     def next_nonbmp_pos(s):
   3192         try:
   3193             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
   3194         except StopIteration:
   3195             return len(s)
   3196 
   3197     while s:
   3198         count = min(next_nonbmp_pos(s), 1024)
   3199 
   3200         ret = WriteConsoleW(
   3201             h, s, count if count else 2, ctypes.byref(written), None)
   3202         if ret == 0:
   3203             raise OSError('Failed to write string')
   3204         if not count:  # We just wrote a non-BMP character
   3205             assert written.value == 2
   3206             s = s[1:]
   3207         else:
   3208             assert written.value > 0
   3209             s = s[written.value:]
   3210     return True
   3211 
   3212 
   3213 def write_string(s, out=None, encoding=None):
   3214     if out is None:
   3215         out = sys.stderr
   3216     assert type(s) == compat_str
   3217 
   3218     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
   3219         if _windows_write_string(s, out):
   3220             return
   3221 
   3222     if ('b' in getattr(out, 'mode', '')
   3223             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
   3224         byt = s.encode(encoding or preferredencoding(), 'ignore')
   3225         out.write(byt)
   3226     elif hasattr(out, 'buffer'):
   3227         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
   3228         byt = s.encode(enc, 'ignore')
   3229         out.buffer.write(byt)
   3230     else:
   3231         out.write(s)
   3232     out.flush()
   3233 
   3234 
   3235 def bytes_to_intlist(bs):
   3236     if not bs:
   3237         return []
   3238     if isinstance(bs[0], int):  # Python 3
   3239         return list(bs)
   3240     else:
   3241         return [ord(c) for c in bs]
   3242 
   3243 
   3244 def intlist_to_bytes(xs):
   3245     if not xs:
   3246         return b''
   3247     return compat_struct_pack('%dB' % len(xs), *xs)
   3248 
   3249 
   3250 # Cross-platform file locking
   3251 if sys.platform == 'win32':
   3252     import ctypes.wintypes
   3253     import msvcrt
   3254 
   3255     class OVERLAPPED(ctypes.Structure):
   3256         _fields_ = [
   3257             ('Internal', ctypes.wintypes.LPVOID),
   3258             ('InternalHigh', ctypes.wintypes.LPVOID),
   3259             ('Offset', ctypes.wintypes.DWORD),
   3260             ('OffsetHigh', ctypes.wintypes.DWORD),
   3261             ('hEvent', ctypes.wintypes.HANDLE),
   3262         ]
   3263 
   3264     kernel32 = ctypes.windll.kernel32
   3265     LockFileEx = kernel32.LockFileEx
   3266     LockFileEx.argtypes = [
   3267         ctypes.wintypes.HANDLE,     # hFile
   3268         ctypes.wintypes.DWORD,      # dwFlags
   3269         ctypes.wintypes.DWORD,      # dwReserved
   3270         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
   3271         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
   3272         ctypes.POINTER(OVERLAPPED)  # Overlapped
   3273     ]
   3274     LockFileEx.restype = ctypes.wintypes.BOOL
   3275     UnlockFileEx = kernel32.UnlockFileEx
   3276     UnlockFileEx.argtypes = [
   3277         ctypes.wintypes.HANDLE,     # hFile
   3278         ctypes.wintypes.DWORD,      # dwReserved
   3279         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
   3280         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
   3281         ctypes.POINTER(OVERLAPPED)  # Overlapped
   3282     ]
   3283     UnlockFileEx.restype = ctypes.wintypes.BOOL
   3284     whole_low = 0xffffffff
   3285     whole_high = 0x7fffffff
   3286 
   3287     def _lock_file(f, exclusive):
   3288         overlapped = OVERLAPPED()
   3289         overlapped.Offset = 0
   3290         overlapped.OffsetHigh = 0
   3291         overlapped.hEvent = 0
   3292         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
   3293         handle = msvcrt.get_osfhandle(f.fileno())
   3294         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
   3295                           whole_low, whole_high, f._lock_file_overlapped_p):
   3296             raise OSError('Locking file failed: %r' % ctypes.FormatError())
   3297 
   3298     def _unlock_file(f):
   3299         assert f._lock_file_overlapped_p
   3300         handle = msvcrt.get_osfhandle(f.fileno())
   3301         if not UnlockFileEx(handle, 0,
   3302                             whole_low, whole_high, f._lock_file_overlapped_p):
   3303             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
   3304 
   3305 else:
   3306     # Some platforms, such as Jython, is missing fcntl
   3307     try:
   3308         import fcntl
   3309 
   3310         def _lock_file(f, exclusive):
   3311             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
   3312 
   3313         def _unlock_file(f):
   3314             fcntl.flock(f, fcntl.LOCK_UN)
   3315     except ImportError:
   3316         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
   3317 
   3318         def _lock_file(f, exclusive):
   3319             raise IOError(UNSUPPORTED_MSG)
   3320 
   3321         def _unlock_file(f):
   3322             raise IOError(UNSUPPORTED_MSG)
   3323 
   3324 
   3325 class locked_file(object):
   3326     def __init__(self, filename, mode, encoding=None):
   3327         assert mode in ['r', 'a', 'w']
   3328         self.f = io.open(filename, mode, encoding=encoding)
   3329         self.mode = mode
   3330 
   3331     def __enter__(self):
   3332         exclusive = self.mode != 'r'
   3333         try:
   3334             _lock_file(self.f, exclusive)
   3335         except IOError:
   3336             self.f.close()
   3337             raise
   3338         return self
   3339 
   3340     def __exit__(self, etype, value, traceback):
   3341         try:
   3342             _unlock_file(self.f)
   3343         finally:
   3344             self.f.close()
   3345 
   3346     def __iter__(self):
   3347         return iter(self.f)
   3348 
   3349     def write(self, *args):
   3350         return self.f.write(*args)
   3351 
   3352     def read(self, *args):
   3353         return self.f.read(*args)
   3354 
   3355 
   3356 def get_filesystem_encoding():
   3357     encoding = sys.getfilesystemencoding()
   3358     return encoding if encoding is not None else 'utf-8'
   3359 
   3360 
   3361 def shell_quote(args):
   3362     quoted_args = []
   3363     encoding = get_filesystem_encoding()
   3364     for a in args:
   3365         if isinstance(a, bytes):
   3366             # We may get a filename encoded with 'encodeFilename'
   3367             a = a.decode(encoding)
   3368         quoted_args.append(compat_shlex_quote(a))
   3369     return ' '.join(quoted_args)
   3370 
   3371 
   3372 def smuggle_url(url, data):
   3373     """ Pass additional data in a URL for internal use. """
   3374 
   3375     url, idata = unsmuggle_url(url, {})
   3376     data.update(idata)
   3377     sdata = compat_urllib_parse_urlencode(
   3378         {'__youtubedl_smuggle': json.dumps(data)})
   3379     return url + '#' + sdata
   3380 
   3381 
   3382 def unsmuggle_url(smug_url, default=None):
   3383     if '#__youtubedl_smuggle' not in smug_url:
   3384         return smug_url, default
   3385     url, _, sdata = smug_url.rpartition('#')
   3386     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
   3387     data = json.loads(jsond)
   3388     return url, data
   3389 
   3390 
   3391 def format_bytes(bytes):
   3392     if bytes is None:
   3393         return 'N/A'
   3394     if type(bytes) is str:
   3395         bytes = float(bytes)
   3396     if bytes == 0.0:
   3397         exponent = 0
   3398     else:
   3399         exponent = int(math.log(bytes, 1024.0))
   3400     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
   3401     converted = float(bytes) / float(1024 ** exponent)
   3402     return '%.2f%s' % (converted, suffix)
   3403 
   3404 
   3405 def lookup_unit_table(unit_table, s):
   3406     units_re = '|'.join(re.escape(u) for u in unit_table)
   3407     m = re.match(
   3408         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
   3409     if not m:
   3410         return None
   3411     num_str = m.group('num').replace(',', '.')
   3412     mult = unit_table[m.group('unit')]
   3413     return int(float(num_str) * mult)
   3414 
   3415 
   3416 def parse_filesize(s):
   3417     if s is None:
   3418         return None
   3419 
   3420     # The lower-case forms are of course incorrect and unofficial,
   3421     # but we support those too
   3422     _UNIT_TABLE = {
   3423         'B': 1,
   3424         'b': 1,
   3425         'bytes': 1,
   3426         'KiB': 1024,
   3427         'KB': 1000,
   3428         'kB': 1024,
   3429         'Kb': 1000,
   3430         'kb': 1000,
   3431         'kilobytes': 1000,
   3432         'kibibytes': 1024,
   3433         'MiB': 1024 ** 2,
   3434         'MB': 1000 ** 2,
   3435         'mB': 1024 ** 2,
   3436         'Mb': 1000 ** 2,
   3437         'mb': 1000 ** 2,
   3438         'megabytes': 1000 ** 2,
   3439         'mebibytes': 1024 ** 2,
   3440         'GiB': 1024 ** 3,
   3441         'GB': 1000 ** 3,
   3442         'gB': 1024 ** 3,
   3443         'Gb': 1000 ** 3,
   3444         'gb': 1000 ** 3,
   3445         'gigabytes': 1000 ** 3,
   3446         'gibibytes': 1024 ** 3,
   3447         'TiB': 1024 ** 4,
   3448         'TB': 1000 ** 4,
   3449         'tB': 1024 ** 4,
   3450         'Tb': 1000 ** 4,
   3451         'tb': 1000 ** 4,
   3452         'terabytes': 1000 ** 4,
   3453         'tebibytes': 1024 ** 4,
   3454         'PiB': 1024 ** 5,
   3455         'PB': 1000 ** 5,
   3456         'pB': 1024 ** 5,
   3457         'Pb': 1000 ** 5,
   3458         'pb': 1000 ** 5,
   3459         'petabytes': 1000 ** 5,
   3460         'pebibytes': 1024 ** 5,
   3461         'EiB': 1024 ** 6,
   3462         'EB': 1000 ** 6,
   3463         'eB': 1024 ** 6,
   3464         'Eb': 1000 ** 6,
   3465         'eb': 1000 ** 6,
   3466         'exabytes': 1000 ** 6,
   3467         'exbibytes': 1024 ** 6,
   3468         'ZiB': 1024 ** 7,
   3469         'ZB': 1000 ** 7,
   3470         'zB': 1024 ** 7,
   3471         'Zb': 1000 ** 7,
   3472         'zb': 1000 ** 7,
   3473         'zettabytes': 1000 ** 7,
   3474         'zebibytes': 1024 ** 7,
   3475         'YiB': 1024 ** 8,
   3476         'YB': 1000 ** 8,
   3477         'yB': 1024 ** 8,
   3478         'Yb': 1000 ** 8,
   3479         'yb': 1000 ** 8,
   3480         'yottabytes': 1000 ** 8,
   3481         'yobibytes': 1024 ** 8,
   3482     }
   3483 
   3484     return lookup_unit_table(_UNIT_TABLE, s)
   3485 
   3486 
   3487 def parse_count(s):
   3488     if s is None:
   3489         return None
   3490 
   3491     s = s.strip()
   3492 
   3493     if re.match(r'^[\d,.]+$', s):
   3494         return str_to_int(s)
   3495 
   3496     _UNIT_TABLE = {
   3497         'k': 1000,
   3498         'K': 1000,
   3499         'm': 1000 ** 2,
   3500         'M': 1000 ** 2,
   3501         'kk': 1000 ** 2,
   3502         'KK': 1000 ** 2,
   3503     }
   3504 
   3505     return lookup_unit_table(_UNIT_TABLE, s)
   3506 
   3507 
   3508 def parse_resolution(s):
   3509     if s is None:
   3510         return {}
   3511 
   3512     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
   3513     if mobj:
   3514         return {
   3515             'width': int(mobj.group('w')),
   3516             'height': int(mobj.group('h')),
   3517         }
   3518 
   3519     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
   3520     if mobj:
   3521         return {'height': int(mobj.group(1))}
   3522 
   3523     mobj = re.search(r'\b([48])[kK]\b', s)
   3524     if mobj:
   3525         return {'height': int(mobj.group(1)) * 540}
   3526 
   3527     return {}
   3528 
   3529 
   3530 def parse_bitrate(s):
   3531     if not isinstance(s, compat_str):
   3532         return
   3533     mobj = re.search(r'\b(\d+)\s*kbps', s)
   3534     if mobj:
   3535         return int(mobj.group(1))
   3536 
   3537 
   3538 def month_by_name(name, lang='en'):
   3539     """ Return the number of a month by (locale-independently) English name """
   3540 
   3541     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
   3542 
   3543     try:
   3544         return month_names.index(name) + 1
   3545     except ValueError:
   3546         return None
   3547 
   3548 
   3549 def month_by_abbreviation(abbrev):
   3550     """ Return the number of a month by (locale-independently) English
   3551         abbreviations """
   3552 
   3553     try:
   3554         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
   3555     except ValueError:
   3556         return None
   3557 
   3558 
   3559 def fix_xml_ampersands(xml_str):
   3560     """Replace all the '&' by '&amp;' in XML"""
   3561     return re.sub(
   3562         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
   3563         '&amp;',
   3564         xml_str)
   3565 
   3566 
   3567 def setproctitle(title):
   3568     assert isinstance(title, compat_str)
   3569 
   3570     # ctypes in Jython is not complete
   3571     # http://bugs.jython.org/issue2148
   3572     if sys.platform.startswith('java'):
   3573         return
   3574 
   3575     try:
   3576         libc = ctypes.cdll.LoadLibrary('libc.so.6')
   3577     except OSError:
   3578         return
   3579     except TypeError:
   3580         # LoadLibrary in Windows Python 2.7.13 only expects
   3581         # a bytestring, but since unicode_literals turns
   3582         # every string into a unicode string, it fails.
   3583         return
   3584     title_bytes = title.encode('utf-8')
   3585     buf = ctypes.create_string_buffer(len(title_bytes))
   3586     buf.value = title_bytes
   3587     try:
   3588         libc.prctl(15, buf, 0, 0, 0)
   3589     except AttributeError:
   3590         return  # Strange libc, just skip this
   3591 
   3592 
   3593 def remove_start(s, start):
   3594     return s[len(start):] if s is not None and s.startswith(start) else s
   3595 
   3596 
   3597 def remove_end(s, end):
   3598     return s[:-len(end)] if s is not None and s.endswith(end) else s
   3599 
   3600 
   3601 def remove_quotes(s):
   3602     if s is None or len(s) < 2:
   3603         return s
   3604     for quote in ('"', "'", ):
   3605         if s[0] == quote and s[-1] == quote:
   3606             return s[1:-1]
   3607     return s
   3608 
   3609 
   3610 def url_basename(url):
   3611     path = compat_urlparse.urlparse(url).path
   3612     return path.strip('/').split('/')[-1]
   3613 
   3614 
   3615 def base_url(url):
   3616     return re.match(r'https?://[^?#&]+/', url).group()
   3617 
   3618 
   3619 def urljoin(base, path):
   3620     if isinstance(path, bytes):
   3621         path = path.decode('utf-8')
   3622     if not isinstance(path, compat_str) or not path:
   3623         return None
   3624     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
   3625         return path
   3626     if isinstance(base, bytes):
   3627         base = base.decode('utf-8')
   3628     if not isinstance(base, compat_str) or not re.match(
   3629             r'^(?:https?:)?//', base):
   3630         return None
   3631     return compat_urlparse.urljoin(base, path)
   3632 
   3633 
   3634 class HEADRequest(compat_urllib_request.Request):
   3635     def get_method(self):
   3636         return 'HEAD'
   3637 
   3638 
   3639 class PUTRequest(compat_urllib_request.Request):
   3640     def get_method(self):
   3641         return 'PUT'
   3642 
   3643 
   3644 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
   3645     if get_attr:
   3646         if v is not None:
   3647             v = getattr(v, get_attr, None)
   3648     if v == '':
   3649         v = None
   3650     if v is None:
   3651         return default
   3652     try:
   3653         return int(v) * invscale // scale
   3654     except (ValueError, TypeError):
   3655         return default
   3656 
   3657 
   3658 def str_or_none(v, default=None):
   3659     return default if v is None else compat_str(v)
   3660 
   3661 
   3662 def str_to_int(int_str):
   3663     """ A more relaxed version of int_or_none """
   3664     if isinstance(int_str, compat_integer_types):
   3665         return int_str
   3666     elif isinstance(int_str, compat_str):
   3667         int_str = re.sub(r'[,\.\+]', '', int_str)
   3668         return int_or_none(int_str)
   3669 
   3670 
   3671 def float_or_none(v, scale=1, invscale=1, default=None):
   3672     if v is None:
   3673         return default
   3674     try:
   3675         return float(v) * invscale / scale
   3676     except (ValueError, TypeError):
   3677         return default
   3678 
   3679 
   3680 def bool_or_none(v, default=None):
   3681     return v if isinstance(v, bool) else default
   3682 
   3683 
   3684 def strip_or_none(v, default=None):
   3685     return v.strip() if isinstance(v, compat_str) else default
   3686 
   3687 
   3688 def url_or_none(url):
   3689     if not url or not isinstance(url, compat_str):
   3690         return None
   3691     url = url.strip()
   3692     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
   3693 
   3694 
   3695 def parse_duration(s):
   3696     if not isinstance(s, compat_basestring):
   3697         return None
   3698 
   3699     s = s.strip()
   3700 
   3701     days, hours, mins, secs, ms = [None] * 5
   3702     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
   3703     if m:
   3704         days, hours, mins, secs, ms = m.groups()
   3705     else:
   3706         m = re.match(
   3707             r'''(?ix)(?:P?
   3708                 (?:
   3709                     [0-9]+\s*y(?:ears?)?\s*
   3710                 )?
   3711                 (?:
   3712                     [0-9]+\s*m(?:onths?)?\s*
   3713                 )?
   3714                 (?:
   3715                     [0-9]+\s*w(?:eeks?)?\s*
   3716                 )?
   3717                 (?:
   3718                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
   3719                 )?
   3720                 T)?
   3721                 (?:
   3722                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
   3723                 )?
   3724                 (?:
   3725                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
   3726                 )?
   3727                 (?:
   3728                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
   3729                 )?Z?$''', s)
   3730         if m:
   3731             days, hours, mins, secs, ms = m.groups()
   3732         else:
   3733             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
   3734             if m:
   3735                 hours, mins = m.groups()
   3736             else:
   3737                 return None
   3738 
   3739     duration = 0
   3740     if secs:
   3741         duration += float(secs)
   3742     if mins:
   3743         duration += float(mins) * 60
   3744     if hours:
   3745         duration += float(hours) * 60 * 60
   3746     if days:
   3747         duration += float(days) * 24 * 60 * 60
   3748     if ms:
   3749         duration += float(ms)
   3750     return duration
   3751 
   3752 
   3753 def prepend_extension(filename, ext, expected_real_ext=None):
   3754     name, real_ext = os.path.splitext(filename)
   3755     return (
   3756         '{0}.{1}{2}'.format(name, ext, real_ext)
   3757         if not expected_real_ext or real_ext[1:] == expected_real_ext
   3758         else '{0}.{1}'.format(filename, ext))
   3759 
   3760 
   3761 def replace_extension(filename, ext, expected_real_ext=None):
   3762     name, real_ext = os.path.splitext(filename)
   3763     return '{0}.{1}'.format(
   3764         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
   3765         ext)
   3766 
   3767 
   3768 def check_executable(exe, args=[]):
   3769     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
   3770     args can be a list of arguments for a short output (like -version) """
   3771     try:
   3772         subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
   3773     except OSError:
   3774         return False
   3775     return exe
   3776 
   3777 
   3778 def get_exe_version(exe, args=['--version'],
   3779                     version_re=None, unrecognized='present'):
   3780     """ Returns the version of the specified executable,
   3781     or False if the executable is not present """
   3782     try:
   3783         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
   3784         # SIGTTOU if youtube-dl is run in the background.
   3785         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
   3786         out, _ = subprocess.Popen(
   3787             [encodeArgument(exe)] + args,
   3788             stdin=subprocess.PIPE,
   3789             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
   3790     except OSError:
   3791         return False
   3792     if isinstance(out, bytes):  # Python 2.x
   3793         out = out.decode('ascii', 'ignore')
   3794     return detect_exe_version(out, version_re, unrecognized)
   3795 
   3796 
   3797 def detect_exe_version(output, version_re=None, unrecognized='present'):
   3798     assert isinstance(output, compat_str)
   3799     if version_re is None:
   3800         version_re = r'version\s+([-0-9._a-zA-Z]+)'
   3801     m = re.search(version_re, output)
   3802     if m:
   3803         return m.group(1)
   3804     else:
   3805         return unrecognized
   3806 
   3807 
   3808 class PagedList(object):
   3809     def __len__(self):
   3810         # This is only useful for tests
   3811         return len(self.getslice())
   3812 
   3813 
   3814 class OnDemandPagedList(PagedList):
   3815     def __init__(self, pagefunc, pagesize, use_cache=True):
   3816         self._pagefunc = pagefunc
   3817         self._pagesize = pagesize
   3818         self._use_cache = use_cache
   3819         if use_cache:
   3820             self._cache = {}
   3821 
   3822     def getslice(self, start=0, end=None):
   3823         res = []
   3824         for pagenum in itertools.count(start // self._pagesize):
   3825             firstid = pagenum * self._pagesize
   3826             nextfirstid = pagenum * self._pagesize + self._pagesize
   3827             if start >= nextfirstid:
   3828                 continue
   3829 
   3830             page_results = None
   3831             if self._use_cache:
   3832                 page_results = self._cache.get(pagenum)
   3833             if page_results is None:
   3834                 page_results = list(self._pagefunc(pagenum))
   3835             if self._use_cache:
   3836                 self._cache[pagenum] = page_results
   3837 
   3838             startv = (
   3839                 start % self._pagesize
   3840                 if firstid <= start < nextfirstid
   3841                 else 0)
   3842 
   3843             endv = (
   3844                 ((end - 1) % self._pagesize) + 1
   3845                 if (end is not None and firstid <= end <= nextfirstid)
   3846                 else None)
   3847 
   3848             if startv != 0 or endv is not None:
   3849                 page_results = page_results[startv:endv]
   3850             res.extend(page_results)
   3851 
   3852             # A little optimization - if current page is not "full", ie. does
   3853             # not contain page_size videos then we can assume that this page
   3854             # is the last one - there are no more ids on further pages -
   3855             # i.e. no need to query again.
   3856             if len(page_results) + startv < self._pagesize:
   3857                 break
   3858 
   3859             # If we got the whole page, but the next page is not interesting,
   3860             # break out early as well
   3861             if end == nextfirstid:
   3862                 break
   3863         return res
   3864 
   3865 
   3866 class InAdvancePagedList(PagedList):
   3867     def __init__(self, pagefunc, pagecount, pagesize):
   3868         self._pagefunc = pagefunc
   3869         self._pagecount = pagecount
   3870         self._pagesize = pagesize
   3871 
   3872     def getslice(self, start=0, end=None):
   3873         res = []
   3874         start_page = start // self._pagesize
   3875         end_page = (
   3876             self._pagecount if end is None else (end // self._pagesize + 1))
   3877         skip_elems = start - start_page * self._pagesize
   3878         only_more = None if end is None else end - start
   3879         for pagenum in range(start_page, end_page):
   3880             page = list(self._pagefunc(pagenum))
   3881             if skip_elems:
   3882                 page = page[skip_elems:]
   3883                 skip_elems = None
   3884             if only_more is not None:
   3885                 if len(page) < only_more:
   3886                     only_more -= len(page)
   3887                 else:
   3888                     page = page[:only_more]
   3889                     res.extend(page)
   3890                     break
   3891             res.extend(page)
   3892         return res
   3893 
   3894 
   3895 def uppercase_escape(s):
   3896     unicode_escape = codecs.getdecoder('unicode_escape')
   3897     return re.sub(
   3898         r'\\U[0-9a-fA-F]{8}',
   3899         lambda m: unicode_escape(m.group(0))[0],
   3900         s)
   3901 
   3902 
   3903 def lowercase_escape(s):
   3904     unicode_escape = codecs.getdecoder('unicode_escape')
   3905     return re.sub(
   3906         r'\\u[0-9a-fA-F]{4}',
   3907         lambda m: unicode_escape(m.group(0))[0],
   3908         s)
   3909 
   3910 
   3911 def escape_rfc3986(s):
   3912     """Escape non-ASCII characters as suggested by RFC 3986"""
   3913     if sys.version_info < (3, 0) and isinstance(s, compat_str):
   3914         s = s.encode('utf-8')
   3915     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
   3916 
   3917 
   3918 def escape_url(url):
   3919     """Escape URL as suggested by RFC 3986"""
   3920     url_parsed = compat_urllib_parse_urlparse(url)
   3921     return url_parsed._replace(
   3922         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
   3923         path=escape_rfc3986(url_parsed.path),
   3924         params=escape_rfc3986(url_parsed.params),
   3925         query=escape_rfc3986(url_parsed.query),
   3926         fragment=escape_rfc3986(url_parsed.fragment)
   3927     ).geturl()
   3928 
   3929 
   3930 def read_batch_urls(batch_fd):
   3931     def fixup(url):
   3932         if not isinstance(url, compat_str):
   3933             url = url.decode('utf-8', 'replace')
   3934         BOM_UTF8 = '\xef\xbb\xbf'
   3935         if url.startswith(BOM_UTF8):
   3936             url = url[len(BOM_UTF8):]
   3937         url = url.strip()
   3938         if url.startswith(('#', ';', ']')):
   3939             return False
   3940         return url
   3941 
   3942     with contextlib.closing(batch_fd) as fd:
   3943         return [url for url in map(fixup, fd) if url]
   3944 
   3945 
   3946 def urlencode_postdata(*args, **kargs):
   3947     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
   3948 
   3949 
   3950 def update_url_query(url, query):
   3951     if not query:
   3952         return url
   3953     parsed_url = compat_urlparse.urlparse(url)
   3954     qs = compat_parse_qs(parsed_url.query)
   3955     qs.update(query)
   3956     return compat_urlparse.urlunparse(parsed_url._replace(
   3957         query=compat_urllib_parse_urlencode(qs, True)))
   3958 
   3959 
   3960 def update_Request(req, url=None, data=None, headers={}, query={}):
   3961     req_headers = req.headers.copy()
   3962     req_headers.update(headers)
   3963     req_data = data or req.data
   3964     req_url = update_url_query(url or req.get_full_url(), query)
   3965     req_get_method = req.get_method()
   3966     if req_get_method == 'HEAD':
   3967         req_type = HEADRequest
   3968     elif req_get_method == 'PUT':
   3969         req_type = PUTRequest
   3970     else:
   3971         req_type = compat_urllib_request.Request
   3972     new_req = req_type(
   3973         req_url, data=req_data, headers=req_headers,
   3974         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
   3975     if hasattr(req, 'timeout'):
   3976         new_req.timeout = req.timeout
   3977     return new_req
   3978 
   3979 
   3980 def _multipart_encode_impl(data, boundary):
   3981     content_type = 'multipart/form-data; boundary=%s' % boundary
   3982 
   3983     out = b''
   3984     for k, v in data.items():
   3985         out += b'--' + boundary.encode('ascii') + b'\r\n'
   3986         if isinstance(k, compat_str):
   3987             k = k.encode('utf-8')
   3988         if isinstance(v, compat_str):
   3989             v = v.encode('utf-8')
   3990         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
   3991         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
   3992         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
   3993         if boundary.encode('ascii') in content:
   3994             raise ValueError('Boundary overlaps with data')
   3995         out += content
   3996 
   3997     out += b'--' + boundary.encode('ascii') + b'--\r\n'
   3998 
   3999     return out, content_type
   4000 
   4001 
   4002 def multipart_encode(data, boundary=None):
   4003     '''
   4004     Encode a dict to RFC 7578-compliant form-data
   4005 
   4006     data:
   4007         A dict where keys and values can be either Unicode or bytes-like
   4008         objects.
   4009     boundary:
   4010         If specified a Unicode object, it's used as the boundary. Otherwise
   4011         a random boundary is generated.
   4012 
   4013     Reference: https://tools.ietf.org/html/rfc7578
   4014     '''
   4015     has_specified_boundary = boundary is not None
   4016 
   4017     while True:
   4018         if boundary is None:
   4019             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
   4020 
   4021         try:
   4022             out, content_type = _multipart_encode_impl(data, boundary)
   4023             break
   4024         except ValueError:
   4025             if has_specified_boundary:
   4026                 raise
   4027             boundary = None
   4028 
   4029     return out, content_type
   4030 
   4031 
   4032 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
   4033     if isinstance(key_or_keys, (list, tuple)):
   4034         for key in key_or_keys:
   4035             if key not in d or d[key] is None or skip_false_values and not d[key]:
   4036                 continue
   4037             return d[key]
   4038         return default
   4039     return d.get(key_or_keys, default)
   4040 
   4041 
   4042 def try_get(src, getter, expected_type=None):
   4043     if not isinstance(getter, (list, tuple)):
   4044         getter = [getter]
   4045     for get in getter:
   4046         try:
   4047             v = get(src)
   4048         except (AttributeError, KeyError, TypeError, IndexError):
   4049             pass
   4050         else:
   4051             if expected_type is None or isinstance(v, expected_type):
   4052                 return v
   4053 
   4054 
   4055 def merge_dicts(*dicts):
   4056     merged = {}
   4057     for a_dict in dicts:
   4058         for k, v in a_dict.items():
   4059             if v is None:
   4060                 continue
   4061             if (k not in merged
   4062                     or (isinstance(v, compat_str) and v
   4063                         and isinstance(merged[k], compat_str)
   4064                         and not merged[k])):
   4065                 merged[k] = v
   4066     return merged
   4067 
   4068 
   4069 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
   4070     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
   4071 
   4072 
   4073 US_RATINGS = {
   4074     'G': 0,
   4075     'PG': 10,
   4076     'PG-13': 13,
   4077     'R': 16,
   4078     'NC': 18,
   4079 }
   4080 
   4081 
   4082 TV_PARENTAL_GUIDELINES = {
   4083     'TV-Y': 0,
   4084     'TV-Y7': 7,
   4085     'TV-G': 0,
   4086     'TV-PG': 0,
   4087     'TV-14': 14,
   4088     'TV-MA': 17,
   4089 }
   4090 
   4091 
   4092 def parse_age_limit(s):
   4093     if type(s) == int:
   4094         return s if 0 <= s <= 21 else None
   4095     if not isinstance(s, compat_basestring):
   4096         return None
   4097     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
   4098     if m:
   4099         return int(m.group('age'))
   4100     if s in US_RATINGS:
   4101         return US_RATINGS[s]
   4102     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
   4103     if m:
   4104         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
   4105     return None
   4106 
   4107 
   4108 def strip_jsonp(code):
   4109     return re.sub(
   4110         r'''(?sx)^
   4111             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
   4112             (?:\s*&&\s*(?P=func_name))?
   4113             \s*\(\s*(?P<callback_data>.*)\);?
   4114             \s*?(?://[^\n]*)*$''',
   4115         r'\g<callback_data>', code)
   4116 
   4117 
   4118 def js_to_json(code):
   4119     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
   4120     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
   4121     INTEGER_TABLE = (
   4122         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
   4123         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
   4124     )
   4125 
   4126     def fix_kv(m):
   4127         v = m.group(0)
   4128         if v in ('true', 'false', 'null'):
   4129             return v
   4130         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
   4131             return ""
   4132 
   4133         if v[0] in ("'", '"'):
   4134             v = re.sub(r'(?s)\\.|"', lambda m: {
   4135                 '"': '\\"',
   4136                 "\\'": "'",
   4137                 '\\\n': '',
   4138                 '\\x': '\\u00',
   4139             }.get(m.group(0), m.group(0)), v[1:-1])
   4140         else:
   4141             for regex, base in INTEGER_TABLE:
   4142                 im = re.match(regex, v)
   4143                 if im:
   4144                     i = int(im.group(1), base)
   4145                     return '"%d":' % i if v.endswith(':') else '%d' % i
   4146 
   4147         return '"%s"' % v
   4148 
   4149     return re.sub(r'''(?sx)
   4150         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
   4151         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
   4152         {comment}|,(?={skip}[\]}}])|
   4153         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
   4154         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
   4155         [0-9]+(?={skip}:)|
   4156         !+
   4157         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
   4158 
   4159 
   4160 def qualities(quality_ids):
   4161     """ Get a numeric quality value out of a list of possible values """
   4162     def q(qid):
   4163         try:
   4164             return quality_ids.index(qid)
   4165         except ValueError:
   4166             return -1
   4167     return q
   4168 
   4169 
   4170 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
   4171 
   4172 
   4173 def limit_length(s, length):
   4174     """ Add ellipses to overly long strings """
   4175     if s is None:
   4176         return None
   4177     ELLIPSES = '...'
   4178     if len(s) > length:
   4179         return s[:length - len(ELLIPSES)] + ELLIPSES
   4180     return s
   4181 
   4182 
   4183 def version_tuple(v):
   4184     return tuple(int(e) for e in re.split(r'[-.]', v))
   4185 
   4186 
   4187 def is_outdated_version(version, limit, assume_new=True):
   4188     if not version:
   4189         return not assume_new
   4190     try:
   4191         return version_tuple(version) < version_tuple(limit)
   4192     except ValueError:
   4193         return not assume_new
   4194 
   4195 
   4196 def ytdl_is_updateable():
   4197     """ Returns if youtube-dl can be updated with -U """
   4198     from zipimport import zipimporter
   4199 
   4200     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
   4201 
   4202 
   4203 def args_to_str(args):
   4204     # Get a short string representation for a subprocess command
   4205     return ' '.join(compat_shlex_quote(a) for a in args)
   4206 
   4207 
   4208 def error_to_compat_str(err):
   4209     err_str = str(err)
   4210     # On python 2 error byte string must be decoded with proper
   4211     # encoding rather than ascii
   4212     if sys.version_info[0] < 3:
   4213         err_str = err_str.decode(preferredencoding())
   4214     return err_str
   4215 
   4216 
   4217 def mimetype2ext(mt):
   4218     if mt is None:
   4219         return None
   4220 
   4221     ext = {
   4222         'audio/mp4': 'm4a',
   4223         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
   4224         # it's the most popular one
   4225         'audio/mpeg': 'mp3',
   4226     }.get(mt)
   4227     if ext is not None:
   4228         return ext
   4229 
   4230     _, _, res = mt.rpartition('/')
   4231     res = res.split(';')[0].strip().lower()
   4232 
   4233     return {
   4234         '3gpp': '3gp',
   4235         'smptett+xml': 'tt',
   4236         'ttaf+xml': 'dfxp',
   4237         'ttml+xml': 'ttml',
   4238         'x-flv': 'flv',
   4239         'x-mp4-fragmented': 'mp4',
   4240         'x-ms-sami': 'sami',
   4241         'x-ms-wmv': 'wmv',
   4242         'mpegurl': 'm3u8',
   4243         'x-mpegurl': 'm3u8',
   4244         'vnd.apple.mpegurl': 'm3u8',
   4245         'dash+xml': 'mpd',
   4246         'f4m+xml': 'f4m',
   4247         'hds+xml': 'f4m',
   4248         'vnd.ms-sstr+xml': 'ism',
   4249         'quicktime': 'mov',
   4250         'mp2t': 'ts',
   4251         'x-wav': 'wav',
   4252     }.get(res, res)
   4253 
   4254 
   4255 def parse_codecs(codecs_str):
   4256     # http://tools.ietf.org/html/rfc6381
   4257     if not codecs_str:
   4258         return {}
   4259     split_codecs = list(filter(None, map(
   4260         lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
   4261     vcodec, acodec = None, None
   4262     for full_codec in split_codecs:
   4263         codec = full_codec.split('.')[0]
   4264         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
   4265             if not vcodec:
   4266                 vcodec = full_codec
   4267         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
   4268             if not acodec:
   4269                 acodec = full_codec
   4270         else:
   4271             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
   4272     if not vcodec and not acodec:
   4273         if len(split_codecs) == 2:
   4274             return {
   4275                 'vcodec': split_codecs[0],
   4276                 'acodec': split_codecs[1],
   4277             }
   4278     else:
   4279         return {
   4280             'vcodec': vcodec or 'none',
   4281             'acodec': acodec or 'none',
   4282         }
   4283     return {}
   4284 
   4285 
   4286 def urlhandle_detect_ext(url_handle):
   4287     getheader = url_handle.headers.get
   4288 
   4289     cd = getheader('Content-Disposition')
   4290     if cd:
   4291         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
   4292         if m:
   4293             e = determine_ext(m.group('filename'), default_ext=None)
   4294             if e:
   4295                 return e
   4296 
   4297     return mimetype2ext(getheader('Content-Type'))
   4298 
   4299 
   4300 def encode_data_uri(data, mime_type):
   4301     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
   4302 
   4303 
   4304 def age_restricted(content_limit, age_limit):
   4305     """ Returns True iff the content should be blocked """
   4306 
   4307     if age_limit is None:  # No limit set
   4308         return False
   4309     if content_limit is None:
   4310         return False  # Content available for everyone
   4311     return age_limit < content_limit
   4312 
   4313 
   4314 def is_html(first_bytes):
   4315     """ Detect whether a file contains HTML by examining its first bytes. """
   4316 
   4317     BOMS = [
   4318         (b'\xef\xbb\xbf', 'utf-8'),
   4319         (b'\x00\x00\xfe\xff', 'utf-32-be'),
   4320         (b'\xff\xfe\x00\x00', 'utf-32-le'),
   4321         (b'\xff\xfe', 'utf-16-le'),
   4322         (b'\xfe\xff', 'utf-16-be'),
   4323     ]
   4324     for bom, enc in BOMS:
   4325         if first_bytes.startswith(bom):
   4326             s = first_bytes[len(bom):].decode(enc, 'replace')
   4327             break
   4328     else:
   4329         s = first_bytes.decode('utf-8', 'replace')
   4330 
   4331     return re.match(r'^\s*<', s)
   4332 
   4333 
   4334 def determine_protocol(info_dict):
   4335     protocol = info_dict.get('protocol')
   4336     if protocol is not None:
   4337         return protocol
   4338 
   4339     url = info_dict['url']
   4340     if url.startswith('rtmp'):
   4341         return 'rtmp'
   4342     elif url.startswith('mms'):
   4343         return 'mms'
   4344     elif url.startswith('rtsp'):
   4345         return 'rtsp'
   4346 
   4347     ext = determine_ext(url)
   4348     if ext == 'm3u8':
   4349         return 'm3u8'
   4350     elif ext == 'f4m':
   4351         return 'f4m'
   4352 
   4353     return compat_urllib_parse_urlparse(url).scheme
   4354 
   4355 
   4356 def render_table(header_row, data):
   4357     """ Render a list of rows, each as a list of values """
   4358     table = [header_row] + data
   4359     max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
   4360     format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
   4361     return '\n'.join(format_str % tuple(row) for row in table)
   4362 
   4363 
   4364 def _match_one(filter_part, dct):
   4365     COMPARISON_OPERATORS = {
   4366         '<': operator.lt,
   4367         '<=': operator.le,
   4368         '>': operator.gt,
   4369         '>=': operator.ge,
   4370         '=': operator.eq,
   4371         '!=': operator.ne,
   4372     }
   4373     operator_rex = re.compile(r'''(?x)\s*
   4374         (?P<key>[a-z_]+)
   4375         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
   4376         (?:
   4377             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
   4378             (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
   4379             (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
   4380         )
   4381         \s*$
   4382         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
   4383     m = operator_rex.search(filter_part)
   4384     if m:
   4385         op = COMPARISON_OPERATORS[m.group('op')]
   4386         actual_value = dct.get(m.group('key'))
   4387         if (m.group('quotedstrval') is not None
   4388             or m.group('strval') is not None
   4389             # If the original field is a string and matching comparisonvalue is
   4390             # a number we should respect the origin of the original field
   4391             # and process comparison value as a string (see
   4392             # https://github.com/ytdl-org/youtube-dl/issues/11082).
   4393             or actual_value is not None and m.group('intval') is not None
   4394                 and isinstance(actual_value, compat_str)):
   4395             if m.group('op') not in ('=', '!='):
   4396                 raise ValueError(
   4397                     'Operator %s does not support string values!' % m.group('op'))
   4398             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
   4399             quote = m.group('quote')
   4400             if quote is not None:
   4401                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
   4402         else:
   4403             try:
   4404                 comparison_value = int(m.group('intval'))
   4405             except ValueError:
   4406                 comparison_value = parse_filesize(m.group('intval'))
   4407                 if comparison_value is None:
   4408                     comparison_value = parse_filesize(m.group('intval') + 'B')
   4409                 if comparison_value is None:
   4410                     raise ValueError(
   4411                         'Invalid integer value %r in filter part %r' % (
   4412                             m.group('intval'), filter_part))
   4413         if actual_value is None:
   4414             return m.group('none_inclusive')
   4415         return op(actual_value, comparison_value)
   4416 
   4417     UNARY_OPERATORS = {
   4418         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
   4419         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
   4420     }
   4421     operator_rex = re.compile(r'''(?x)\s*
   4422         (?P<op>%s)\s*(?P<key>[a-z_]+)
   4423         \s*$
   4424         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
   4425     m = operator_rex.search(filter_part)
   4426     if m:
   4427         op = UNARY_OPERATORS[m.group('op')]
   4428         actual_value = dct.get(m.group('key'))
   4429         return op(actual_value)
   4430 
   4431     raise ValueError('Invalid filter part %r' % filter_part)
   4432 
   4433 
   4434 def match_str(filter_str, dct):
   4435     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
   4436 
   4437     return all(
   4438         _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
   4439 
   4440 
   4441 def match_filter_func(filter_str):
   4442     def _match_func(info_dict):
   4443         if match_str(filter_str, info_dict):
   4444             return None
   4445         else:
   4446             video_title = info_dict.get('title', info_dict.get('id', 'video'))
   4447             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
   4448     return _match_func
   4449 
   4450 
   4451 def parse_dfxp_time_expr(time_expr):
   4452     if not time_expr:
   4453         return
   4454 
   4455     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
   4456     if mobj:
   4457         return float(mobj.group('time_offset'))
   4458 
   4459     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
   4460     if mobj:
   4461         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
   4462 
   4463 
   4464 def srt_subtitles_timecode(seconds):
   4465     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
   4466 
   4467 
   4468 def dfxp2srt(dfxp_data):
   4469     '''
   4470     @param dfxp_data A bytes-like object containing DFXP data
   4471     @returns A unicode object containing converted SRT data
   4472     '''
   4473     LEGACY_NAMESPACES = (
   4474         (b'http://www.w3.org/ns/ttml', [
   4475             b'http://www.w3.org/2004/11/ttaf1',
   4476             b'http://www.w3.org/2006/04/ttaf1',
   4477             b'http://www.w3.org/2006/10/ttaf1',
   4478         ]),
   4479         (b'http://www.w3.org/ns/ttml#styling', [
   4480             b'http://www.w3.org/ns/ttml#style',
   4481         ]),
   4482     )
   4483 
   4484     SUPPORTED_STYLING = [
   4485         'color',
   4486         'fontFamily',
   4487         'fontSize',
   4488         'fontStyle',
   4489         'fontWeight',
   4490         'textDecoration'
   4491     ]
   4492 
   4493     _x = functools.partial(xpath_with_ns, ns_map={
   4494         'xml': 'http://www.w3.org/XML/1998/namespace',
   4495         'ttml': 'http://www.w3.org/ns/ttml',
   4496         'tts': 'http://www.w3.org/ns/ttml#styling',
   4497     })
   4498 
   4499     styles = {}
   4500     default_style = {}
   4501 
   4502     class TTMLPElementParser(object):
   4503         _out = ''
   4504         _unclosed_elements = []
   4505         _applied_styles = []
   4506 
   4507         def start(self, tag, attrib):
   4508             if tag in (_x('ttml:br'), 'br'):
   4509                 self._out += '\n'
   4510             else:
   4511                 unclosed_elements = []
   4512                 style = {}
   4513                 element_style_id = attrib.get('style')
   4514                 if default_style:
   4515                     style.update(default_style)
   4516                 if element_style_id:
   4517                     style.update(styles.get(element_style_id, {}))
   4518                 for prop in SUPPORTED_STYLING:
   4519                     prop_val = attrib.get(_x('tts:' + prop))
   4520                     if prop_val:
   4521                         style[prop] = prop_val
   4522                 if style:
   4523                     font = ''
   4524                     for k, v in sorted(style.items()):
   4525                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
   4526                             continue
   4527                         if k == 'color':
   4528                             font += ' color="%s"' % v
   4529                         elif k == 'fontSize':
   4530                             font += ' size="%s"' % v
   4531                         elif k == 'fontFamily':
   4532                             font += ' face="%s"' % v
   4533                         elif k == 'fontWeight' and v == 'bold':
   4534                             self._out += '<b>'
   4535                             unclosed_elements.append('b')
   4536                         elif k == 'fontStyle' and v == 'italic':
   4537                             self._out += '<i>'
   4538                             unclosed_elements.append('i')
   4539                         elif k == 'textDecoration' and v == 'underline':
   4540                             self._out += '<u>'
   4541                             unclosed_elements.append('u')
   4542                     if font:
   4543                         self._out += '<font' + font + '>'
   4544                         unclosed_elements.append('font')
   4545                     applied_style = {}
   4546                     if self._applied_styles:
   4547                         applied_style.update(self._applied_styles[-1])
   4548                     applied_style.update(style)
   4549                     self._applied_styles.append(applied_style)
   4550                 self._unclosed_elements.append(unclosed_elements)
   4551 
   4552         def end(self, tag):
   4553             if tag not in (_x('ttml:br'), 'br'):
   4554                 unclosed_elements = self._unclosed_elements.pop()
   4555                 for element in reversed(unclosed_elements):
   4556                     self._out += '</%s>' % element
   4557                 if unclosed_elements and self._applied_styles:
   4558                     self._applied_styles.pop()
   4559 
   4560         def data(self, data):
   4561             self._out += data
   4562 
   4563         def close(self):
   4564             return self._out.strip()
   4565 
   4566     def parse_node(node):
   4567         target = TTMLPElementParser()
   4568         parser = xml.etree.ElementTree.XMLParser(target=target)
   4569         parser.feed(xml.etree.ElementTree.tostring(node))
   4570         return parser.close()
   4571 
   4572     for k, v in LEGACY_NAMESPACES:
   4573         for ns in v:
   4574             dfxp_data = dfxp_data.replace(ns, k)
   4575 
   4576     dfxp = compat_etree_fromstring(dfxp_data)
   4577     out = []
   4578     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
   4579 
   4580     if not paras:
   4581         raise ValueError('Invalid dfxp/TTML subtitle')
   4582 
   4583     repeat = False
   4584     while True:
   4585         for style in dfxp.findall(_x('.//ttml:style')):
   4586             style_id = style.get('id') or style.get(_x('xml:id'))
   4587             if not style_id:
   4588                 continue
   4589             parent_style_id = style.get('style')
   4590             if parent_style_id:
   4591                 if parent_style_id not in styles:
   4592                     repeat = True
   4593                     continue
   4594                 styles[style_id] = styles[parent_style_id].copy()
   4595             for prop in SUPPORTED_STYLING:
   4596                 prop_val = style.get(_x('tts:' + prop))
   4597                 if prop_val:
   4598                     styles.setdefault(style_id, {})[prop] = prop_val
   4599         if repeat:
   4600             repeat = False
   4601         else:
   4602             break
   4603 
   4604     for p in ('body', 'div'):
   4605         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
   4606         if ele is None:
   4607             continue
   4608         style = styles.get(ele.get('style'))
   4609         if not style:
   4610             continue
   4611         default_style.update(style)
   4612 
   4613     for para, index in zip(paras, itertools.count(1)):
   4614         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
   4615         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
   4616         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
   4617         if begin_time is None:
   4618             continue
   4619         if not end_time:
   4620             if not dur:
   4621                 continue
   4622             end_time = begin_time + dur
   4623         out.append('%d\n%s --> %s\n%s\n\n' % (
   4624             index,
   4625             srt_subtitles_timecode(begin_time),
   4626             srt_subtitles_timecode(end_time),
   4627             parse_node(para)))
   4628 
   4629     return ''.join(out)
   4630 
   4631 
   4632 def cli_option(params, command_option, param):
   4633     param = params.get(param)
   4634     if param:
   4635         param = compat_str(param)
   4636     return [command_option, param] if param is not None else []
   4637 
   4638 
   4639 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
   4640     param = params.get(param)
   4641     if param is None:
   4642         return []
   4643     assert isinstance(param, bool)
   4644     if separator:
   4645         return [command_option + separator + (true_value if param else false_value)]
   4646     return [command_option, true_value if param else false_value]
   4647 
   4648 
   4649 def cli_valueless_option(params, command_option, param, expected_value=True):
   4650     param = params.get(param)
   4651     return [command_option] if param == expected_value else []
   4652 
   4653 
   4654 def cli_configuration_args(params, param, default=[]):
   4655     ex_args = params.get(param)
   4656     if ex_args is None:
   4657         return default
   4658     assert isinstance(ex_args, list)
   4659     return ex_args
   4660 
   4661 
   4662 class ISO639Utils(object):
   4663     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
   4664     _lang_map = {
   4665         'aa': 'aar',
   4666         'ab': 'abk',
   4667         'ae': 'ave',
   4668         'af': 'afr',
   4669         'ak': 'aka',
   4670         'am': 'amh',
   4671         'an': 'arg',
   4672         'ar': 'ara',
   4673         'as': 'asm',
   4674         'av': 'ava',
   4675         'ay': 'aym',
   4676         'az': 'aze',
   4677         'ba': 'bak',
   4678         'be': 'bel',
   4679         'bg': 'bul',
   4680         'bh': 'bih',
   4681         'bi': 'bis',
   4682         'bm': 'bam',
   4683         'bn': 'ben',
   4684         'bo': 'bod',
   4685         'br': 'bre',
   4686         'bs': 'bos',
   4687         'ca': 'cat',
   4688         'ce': 'che',
   4689         'ch': 'cha',
   4690         'co': 'cos',
   4691         'cr': 'cre',
   4692         'cs': 'ces',
   4693         'cu': 'chu',
   4694         'cv': 'chv',
   4695         'cy': 'cym',
   4696         'da': 'dan',
   4697         'de': 'deu',
   4698         'dv': 'div',
   4699         'dz': 'dzo',
   4700         'ee': 'ewe',
   4701         'el': 'ell',
   4702         'en': 'eng',
   4703         'eo': 'epo',
   4704         'es': 'spa',
   4705         'et': 'est',
   4706         'eu': 'eus',
   4707         'fa': 'fas',
   4708         'ff': 'ful',
   4709         'fi': 'fin',
   4710         'fj': 'fij',
   4711         'fo': 'fao',
   4712         'fr': 'fra',
   4713         'fy': 'fry',
   4714         'ga': 'gle',
   4715         'gd': 'gla',
   4716         'gl': 'glg',
   4717         'gn': 'grn',
   4718         'gu': 'guj',
   4719         'gv': 'glv',
   4720         'ha': 'hau',
   4721         'he': 'heb',
   4722         'iw': 'heb',  # Replaced by he in 1989 revision
   4723         'hi': 'hin',
   4724         'ho': 'hmo',
   4725         'hr': 'hrv',
   4726         'ht': 'hat',
   4727         'hu': 'hun',
   4728         'hy': 'hye',
   4729         'hz': 'her',
   4730         'ia': 'ina',
   4731         'id': 'ind',
   4732         'in': 'ind',  # Replaced by id in 1989 revision
   4733         'ie': 'ile',
   4734         'ig': 'ibo',
   4735         'ii': 'iii',
   4736         'ik': 'ipk',
   4737         'io': 'ido',
   4738         'is': 'isl',
   4739         'it': 'ita',
   4740         'iu': 'iku',
   4741         'ja': 'jpn',
   4742         'jv': 'jav',
   4743         'ka': 'kat',
   4744         'kg': 'kon',
   4745         'ki': 'kik',
   4746         'kj': 'kua',
   4747         'kk': 'kaz',
   4748         'kl': 'kal',
   4749         'km': 'khm',
   4750         'kn': 'kan',
   4751         'ko': 'kor',
   4752         'kr': 'kau',
   4753         'ks': 'kas',
   4754         'ku': 'kur',
   4755         'kv': 'kom',
   4756         'kw': 'cor',
   4757         'ky': 'kir',
   4758         'la': 'lat',
   4759         'lb': 'ltz',
   4760         'lg': 'lug',
   4761         'li': 'lim',
   4762         'ln': 'lin',
   4763         'lo': 'lao',
   4764         'lt': 'lit',
   4765         'lu': 'lub',
   4766         'lv': 'lav',
   4767         'mg': 'mlg',
   4768         'mh': 'mah',
   4769         'mi': 'mri',
   4770         'mk': 'mkd',
   4771         'ml': 'mal',
   4772         'mn': 'mon',
   4773         'mr': 'mar',
   4774         'ms': 'msa',
   4775         'mt': 'mlt',
   4776         'my': 'mya',
   4777         'na': 'nau',
   4778         'nb': 'nob',
   4779         'nd': 'nde',
   4780         'ne': 'nep',
   4781         'ng': 'ndo',
   4782         'nl': 'nld',
   4783         'nn': 'nno',
   4784         'no': 'nor',
   4785         'nr': 'nbl',
   4786         'nv': 'nav',
   4787         'ny': 'nya',
   4788         'oc': 'oci',
   4789         'oj': 'oji',
   4790         'om': 'orm',
   4791         'or': 'ori',
   4792         'os': 'oss',
   4793         'pa': 'pan',
   4794         'pi': 'pli',
   4795         'pl': 'pol',
   4796         'ps': 'pus',
   4797         'pt': 'por',
   4798         'qu': 'que',
   4799         'rm': 'roh',
   4800         'rn': 'run',
   4801         'ro': 'ron',
   4802         'ru': 'rus',
   4803         'rw': 'kin',
   4804         'sa': 'san',
   4805         'sc': 'srd',
   4806         'sd': 'snd',
   4807         'se': 'sme',
   4808         'sg': 'sag',
   4809         'si': 'sin',
   4810         'sk': 'slk',
   4811         'sl': 'slv',
   4812         'sm': 'smo',
   4813         'sn': 'sna',
   4814         'so': 'som',
   4815         'sq': 'sqi',
   4816         'sr': 'srp',
   4817         'ss': 'ssw',
   4818         'st': 'sot',
   4819         'su': 'sun',
   4820         'sv': 'swe',
   4821         'sw': 'swa',
   4822         'ta': 'tam',
   4823         'te': 'tel',
   4824         'tg': 'tgk',
   4825         'th': 'tha',
   4826         'ti': 'tir',
   4827         'tk': 'tuk',
   4828         'tl': 'tgl',
   4829         'tn': 'tsn',
   4830         'to': 'ton',
   4831         'tr': 'tur',
   4832         'ts': 'tso',
   4833         'tt': 'tat',
   4834         'tw': 'twi',
   4835         'ty': 'tah',
   4836         'ug': 'uig',
   4837         'uk': 'ukr',
   4838         'ur': 'urd',
   4839         'uz': 'uzb',
   4840         've': 'ven',
   4841         'vi': 'vie',
   4842         'vo': 'vol',
   4843         'wa': 'wln',
   4844         'wo': 'wol',
   4845         'xh': 'xho',
   4846         'yi': 'yid',
   4847         'ji': 'yid',  # Replaced by yi in 1989 revision
   4848         'yo': 'yor',
   4849         'za': 'zha',
   4850         'zh': 'zho',
   4851         'zu': 'zul',
   4852     }
   4853 
   4854     @classmethod
   4855     def short2long(cls, code):
   4856         """Convert language code from ISO 639-1 to ISO 639-2/T"""
   4857         return cls._lang_map.get(code[:2])
   4858 
   4859     @classmethod
   4860     def long2short(cls, code):
   4861         """Convert language code from ISO 639-2/T to ISO 639-1"""
   4862         for short_name, long_name in cls._lang_map.items():
   4863             if long_name == code:
   4864                 return short_name
   4865 
   4866 
   4867 class ISO3166Utils(object):
   4868     # From http://data.okfn.org/data/core/country-list
   4869     _country_map = {
   4870         'AF': 'Afghanistan',
   4871         'AX': 'Åland Islands',
   4872         'AL': 'Albania',
   4873         'DZ': 'Algeria',
   4874         'AS': 'American Samoa',
   4875         'AD': 'Andorra',
   4876         'AO': 'Angola',
   4877         'AI': 'Anguilla',
   4878         'AQ': 'Antarctica',
   4879         'AG': 'Antigua and Barbuda',
   4880         'AR': 'Argentina',
   4881         'AM': 'Armenia',
   4882         'AW': 'Aruba',
   4883         'AU': 'Australia',
   4884         'AT': 'Austria',
   4885         'AZ': 'Azerbaijan',
   4886         'BS': 'Bahamas',
   4887         'BH': 'Bahrain',
   4888         'BD': 'Bangladesh',
   4889         'BB': 'Barbados',
   4890         'BY': 'Belarus',
   4891         'BE': 'Belgium',
   4892         'BZ': 'Belize',
   4893         'BJ': 'Benin',
   4894         'BM': 'Bermuda',
   4895         'BT': 'Bhutan',
   4896         'BO': 'Bolivia, Plurinational State of',
   4897         'BQ': 'Bonaire, Sint Eustatius and Saba',
   4898         'BA': 'Bosnia and Herzegovina',
   4899         'BW': 'Botswana',
   4900         'BV': 'Bouvet Island',
   4901         'BR': 'Brazil',
   4902         'IO': 'British Indian Ocean Territory',
   4903         'BN': 'Brunei Darussalam',
   4904         'BG': 'Bulgaria',
   4905         'BF': 'Burkina Faso',
   4906         'BI': 'Burundi',
   4907         'KH': 'Cambodia',
   4908         'CM': 'Cameroon',
   4909         'CA': 'Canada',
   4910         'CV': 'Cape Verde',
   4911         'KY': 'Cayman Islands',
   4912         'CF': 'Central African Republic',
   4913         'TD': 'Chad',
   4914         'CL': 'Chile',
   4915         'CN': 'China',
   4916         'CX': 'Christmas Island',
   4917         'CC': 'Cocos (Keeling) Islands',
   4918         'CO': 'Colombia',
   4919         'KM': 'Comoros',
   4920         'CG': 'Congo',
   4921         'CD': 'Congo, the Democratic Republic of the',
   4922         'CK': 'Cook Islands',
   4923         'CR': 'Costa Rica',
   4924         'CI': 'Côte d\'Ivoire',
   4925         'HR': 'Croatia',
   4926         'CU': 'Cuba',
   4927         'CW': 'Curaçao',
   4928         'CY': 'Cyprus',
   4929         'CZ': 'Czech Republic',
   4930         'DK': 'Denmark',
   4931         'DJ': 'Djibouti',
   4932         'DM': 'Dominica',
   4933         'DO': 'Dominican Republic',
   4934         'EC': 'Ecuador',
   4935         'EG': 'Egypt',
   4936         'SV': 'El Salvador',
   4937         'GQ': 'Equatorial Guinea',
   4938         'ER': 'Eritrea',
   4939         'EE': 'Estonia',
   4940         'ET': 'Ethiopia',
   4941         'FK': 'Falkland Islands (Malvinas)',
   4942         'FO': 'Faroe Islands',
   4943         'FJ': 'Fiji',
   4944         'FI': 'Finland',
   4945         'FR': 'France',
   4946         'GF': 'French Guiana',
   4947         'PF': 'French Polynesia',
   4948         'TF': 'French Southern Territories',
   4949         'GA': 'Gabon',
   4950         'GM': 'Gambia',
   4951         'GE': 'Georgia',
   4952         'DE': 'Germany',
   4953         'GH': 'Ghana',
   4954         'GI': 'Gibraltar',
   4955         'GR': 'Greece',
   4956         'GL': 'Greenland',
   4957         'GD': 'Grenada',
   4958         'GP': 'Guadeloupe',
   4959         'GU': 'Guam',
   4960         'GT': 'Guatemala',
   4961         'GG': 'Guernsey',
   4962         'GN': 'Guinea',
   4963         'GW': 'Guinea-Bissau',
   4964         'GY': 'Guyana',
   4965         'HT': 'Haiti',
   4966         'HM': 'Heard Island and McDonald Islands',
   4967         'VA': 'Holy See (Vatican City State)',
   4968         'HN': 'Honduras',
   4969         'HK': 'Hong Kong',
   4970         'HU': 'Hungary',
   4971         'IS': 'Iceland',
   4972         'IN': 'India',
   4973         'ID': 'Indonesia',
   4974         'IR': 'Iran, Islamic Republic of',
   4975         'IQ': 'Iraq',
   4976         'IE': 'Ireland',
   4977         'IM': 'Isle of Man',
   4978         'IL': 'Israel',
   4979         'IT': 'Italy',
   4980         'JM': 'Jamaica',
   4981         'JP': 'Japan',
   4982         'JE': 'Jersey',
   4983         'JO': 'Jordan',
   4984         'KZ': 'Kazakhstan',
   4985         'KE': 'Kenya',
   4986         'KI': 'Kiribati',
   4987         'KP': 'Korea, Democratic People\'s Republic of',
   4988         'KR': 'Korea, Republic of',
   4989         'KW': 'Kuwait',
   4990         'KG': 'Kyrgyzstan',
   4991         'LA': 'Lao People\'s Democratic Republic',
   4992         'LV': 'Latvia',
   4993         'LB': 'Lebanon',
   4994         'LS': 'Lesotho',
   4995         'LR': 'Liberia',
   4996         'LY': 'Libya',
   4997         'LI': 'Liechtenstein',
   4998         'LT': 'Lithuania',
   4999         'LU': 'Luxembourg',
   5000         'MO': 'Macao',
   5001         'MK': 'Macedonia, the Former Yugoslav Republic of',
   5002         'MG': 'Madagascar',
   5003         'MW': 'Malawi',
   5004         'MY': 'Malaysia',
   5005         'MV': 'Maldives',
   5006         'ML': 'Mali',
   5007         'MT': 'Malta',
   5008         'MH': 'Marshall Islands',
   5009         'MQ': 'Martinique',
   5010         'MR': 'Mauritania',
   5011         'MU': 'Mauritius',
   5012         'YT': 'Mayotte',
   5013         'MX': 'Mexico',
   5014         'FM': 'Micronesia, Federated States of',
   5015         'MD': 'Moldova, Republic of',
   5016         'MC': 'Monaco',
   5017         'MN': 'Mongolia',
   5018         'ME': 'Montenegro',
   5019         'MS': 'Montserrat',
   5020         'MA': 'Morocco',
   5021         'MZ': 'Mozambique',
   5022         'MM': 'Myanmar',
   5023         'NA': 'Namibia',
   5024         'NR': 'Nauru',
   5025         'NP': 'Nepal',
   5026         'NL': 'Netherlands',
   5027         'NC': 'New Caledonia',
   5028         'NZ': 'New Zealand',
   5029         'NI': 'Nicaragua',
   5030         'NE': 'Niger',
   5031         'NG': 'Nigeria',
   5032         'NU': 'Niue',
   5033         'NF': 'Norfolk Island',
   5034         'MP': 'Northern Mariana Islands',
   5035         'NO': 'Norway',
   5036         'OM': 'Oman',
   5037         'PK': 'Pakistan',
   5038         'PW': 'Palau',
   5039         'PS': 'Palestine, State of',
   5040         'PA': 'Panama',
   5041         'PG': 'Papua New Guinea',
   5042         'PY': 'Paraguay',
   5043         'PE': 'Peru',
   5044         'PH': 'Philippines',
   5045         'PN': 'Pitcairn',
   5046         'PL': 'Poland',
   5047         'PT': 'Portugal',
   5048         'PR': 'Puerto Rico',
   5049         'QA': 'Qatar',
   5050         'RE': 'Réunion',
   5051         'RO': 'Romania',
   5052         'RU': 'Russian Federation',
   5053         'RW': 'Rwanda',
   5054         'BL': 'Saint Barthélemy',
   5055         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
   5056         'KN': 'Saint Kitts and Nevis',
   5057         'LC': 'Saint Lucia',
   5058         'MF': 'Saint Martin (French part)',
   5059         'PM': 'Saint Pierre and Miquelon',
   5060         'VC': 'Saint Vincent and the Grenadines',
   5061         'WS': 'Samoa',
   5062         'SM': 'San Marino',
   5063         'ST': 'Sao Tome and Principe',
   5064         'SA': 'Saudi Arabia',
   5065         'SN': 'Senegal',
   5066         'RS': 'Serbia',
   5067         'SC': 'Seychelles',
   5068         'SL': 'Sierra Leone',
   5069         'SG': 'Singapore',
   5070         'SX': 'Sint Maarten (Dutch part)',
   5071         'SK': 'Slovakia',
   5072         'SI': 'Slovenia',
   5073         'SB': 'Solomon Islands',
   5074         'SO': 'Somalia',
   5075         'ZA': 'South Africa',
   5076         'GS': 'South Georgia and the South Sandwich Islands',
   5077         'SS': 'South Sudan',
   5078         'ES': 'Spain',
   5079         'LK': 'Sri Lanka',
   5080         'SD': 'Sudan',
   5081         'SR': 'Suriname',
   5082         'SJ': 'Svalbard and Jan Mayen',
   5083         'SZ': 'Swaziland',
   5084         'SE': 'Sweden',
   5085         'CH': 'Switzerland',
   5086         'SY': 'Syrian Arab Republic',
   5087         'TW': 'Taiwan, Province of China',
   5088         'TJ': 'Tajikistan',
   5089         'TZ': 'Tanzania, United Republic of',
   5090         'TH': 'Thailand',
   5091         'TL': 'Timor-Leste',
   5092         'TG': 'Togo',
   5093         'TK': 'Tokelau',
   5094         'TO': 'Tonga',
   5095         'TT': 'Trinidad and Tobago',
   5096         'TN': 'Tunisia',
   5097         'TR': 'Turkey',
   5098         'TM': 'Turkmenistan',
   5099         'TC': 'Turks and Caicos Islands',
   5100         'TV': 'Tuvalu',
   5101         'UG': 'Uganda',
   5102         'UA': 'Ukraine',
   5103         'AE': 'United Arab Emirates',
   5104         'GB': 'United Kingdom',
   5105         'US': 'United States',
   5106         'UM': 'United States Minor Outlying Islands',
   5107         'UY': 'Uruguay',
   5108         'UZ': 'Uzbekistan',
   5109         'VU': 'Vanuatu',
   5110         'VE': 'Venezuela, Bolivarian Republic of',
   5111         'VN': 'Viet Nam',
   5112         'VG': 'Virgin Islands, British',
   5113         'VI': 'Virgin Islands, U.S.',
   5114         'WF': 'Wallis and Futuna',
   5115         'EH': 'Western Sahara',
   5116         'YE': 'Yemen',
   5117         'ZM': 'Zambia',
   5118         'ZW': 'Zimbabwe',
   5119     }
   5120 
   5121     @classmethod
   5122     def short2full(cls, code):
   5123         """Convert an ISO 3166-2 country code to the corresponding full name"""
   5124         return cls._country_map.get(code.upper())
   5125 
   5126 
   5127 class GeoUtils(object):
   5128     # Major IPv4 address blocks per country
   5129     _country_ip_map = {
   5130         'AD': '46.172.224.0/19',
   5131         'AE': '94.200.0.0/13',
   5132         'AF': '149.54.0.0/17',
   5133         'AG': '209.59.64.0/18',
   5134         'AI': '204.14.248.0/21',
   5135         'AL': '46.99.0.0/16',
   5136         'AM': '46.70.0.0/15',
   5137         'AO': '105.168.0.0/13',
   5138         'AP': '182.50.184.0/21',
   5139         'AQ': '23.154.160.0/24',
   5140         'AR': '181.0.0.0/12',
   5141         'AS': '202.70.112.0/20',
   5142         'AT': '77.116.0.0/14',
   5143         'AU': '1.128.0.0/11',
   5144         'AW': '181.41.0.0/18',
   5145         'AX': '185.217.4.0/22',
   5146         'AZ': '5.197.0.0/16',
   5147         'BA': '31.176.128.0/17',
   5148         'BB': '65.48.128.0/17',
   5149         'BD': '114.130.0.0/16',
   5150         'BE': '57.0.0.0/8',
   5151         'BF': '102.178.0.0/15',
   5152         'BG': '95.42.0.0/15',
   5153         'BH': '37.131.0.0/17',
   5154         'BI': '154.117.192.0/18',
   5155         'BJ': '137.255.0.0/16',
   5156         'BL': '185.212.72.0/23',
   5157         'BM': '196.12.64.0/18',
   5158         'BN': '156.31.0.0/16',
   5159         'BO': '161.56.0.0/16',
   5160         'BQ': '161.0.80.0/20',
   5161         'BR': '191.128.0.0/12',
   5162         'BS': '24.51.64.0/18',
   5163         'BT': '119.2.96.0/19',
   5164         'BW': '168.167.0.0/16',
   5165         'BY': '178.120.0.0/13',
   5166         'BZ': '179.42.192.0/18',
   5167         'CA': '99.224.0.0/11',
   5168         'CD': '41.243.0.0/16',
   5169         'CF': '197.242.176.0/21',
   5170         'CG': '160.113.0.0/16',
   5171         'CH': '85.0.0.0/13',
   5172         'CI': '102.136.0.0/14',
   5173         'CK': '202.65.32.0/19',
   5174         'CL': '152.172.0.0/14',
   5175         'CM': '102.244.0.0/14',
   5176         'CN': '36.128.0.0/10',
   5177         'CO': '181.240.0.0/12',
   5178         'CR': '201.192.0.0/12',
   5179         'CU': '152.206.0.0/15',
   5180         'CV': '165.90.96.0/19',
   5181         'CW': '190.88.128.0/17',
   5182         'CY': '31.153.0.0/16',
   5183         'CZ': '88.100.0.0/14',
   5184         'DE': '53.0.0.0/8',
   5185         'DJ': '197.241.0.0/17',
   5186         'DK': '87.48.0.0/12',
   5187         'DM': '192.243.48.0/20',
   5188         'DO': '152.166.0.0/15',
   5189         'DZ': '41.96.0.0/12',
   5190         'EC': '186.68.0.0/15',
   5191         'EE': '90.190.0.0/15',
   5192         'EG': '156.160.0.0/11',
   5193         'ER': '196.200.96.0/20',
   5194         'ES': '88.0.0.0/11',
   5195         'ET': '196.188.0.0/14',
   5196         'EU': '2.16.0.0/13',
   5197         'FI': '91.152.0.0/13',
   5198         'FJ': '144.120.0.0/16',
   5199         'FK': '80.73.208.0/21',
   5200         'FM': '119.252.112.0/20',
   5201         'FO': '88.85.32.0/19',
   5202         'FR': '90.0.0.0/9',
   5203         'GA': '41.158.0.0/15',
   5204         'GB': '25.0.0.0/8',
   5205         'GD': '74.122.88.0/21',
   5206         'GE': '31.146.0.0/16',
   5207         'GF': '161.22.64.0/18',
   5208         'GG': '62.68.160.0/19',
   5209         'GH': '154.160.0.0/12',
   5210         'GI': '95.164.0.0/16',
   5211         'GL': '88.83.0.0/19',
   5212         'GM': '160.182.0.0/15',
   5213         'GN': '197.149.192.0/18',
   5214         'GP': '104.250.0.0/19',
   5215         'GQ': '105.235.224.0/20',
   5216         'GR': '94.64.0.0/13',
   5217         'GT': '168.234.0.0/16',
   5218         'GU': '168.123.0.0/16',
   5219         'GW': '197.214.80.0/20',
   5220         'GY': '181.41.64.0/18',
   5221         'HK': '113.252.0.0/14',
   5222         'HN': '181.210.0.0/16',
   5223         'HR': '93.136.0.0/13',
   5224         'HT': '148.102.128.0/17',
   5225         'HU': '84.0.0.0/14',
   5226         'ID': '39.192.0.0/10',
   5227         'IE': '87.32.0.0/12',
   5228         'IL': '79.176.0.0/13',
   5229         'IM': '5.62.80.0/20',
   5230         'IN': '117.192.0.0/10',
   5231         'IO': '203.83.48.0/21',
   5232         'IQ': '37.236.0.0/14',
   5233         'IR': '2.176.0.0/12',
   5234         'IS': '82.221.0.0/16',
   5235         'IT': '79.0.0.0/10',
   5236         'JE': '87.244.64.0/18',
   5237         'JM': '72.27.0.0/17',
   5238         'JO': '176.29.0.0/16',
   5239         'JP': '133.0.0.0/8',
   5240         'KE': '105.48.0.0/12',
   5241         'KG': '158.181.128.0/17',
   5242         'KH': '36.37.128.0/17',
   5243         'KI': '103.25.140.0/22',
   5244         'KM': '197.255.224.0/20',
   5245         'KN': '198.167.192.0/19',
   5246         'KP': '175.45.176.0/22',
   5247         'KR': '175.192.0.0/10',
   5248         'KW': '37.36.0.0/14',
   5249         'KY': '64.96.0.0/15',
   5250         'KZ': '2.72.0.0/13',
   5251         'LA': '115.84.64.0/18',
   5252         'LB': '178.135.0.0/16',
   5253         'LC': '24.92.144.0/20',
   5254         'LI': '82.117.0.0/19',
   5255         'LK': '112.134.0.0/15',
   5256         'LR': '102.183.0.0/16',
   5257         'LS': '129.232.0.0/17',
   5258         'LT': '78.56.0.0/13',
   5259         'LU': '188.42.0.0/16',
   5260         'LV': '46.109.0.0/16',
   5261         'LY': '41.252.0.0/14',
   5262         'MA': '105.128.0.0/11',
   5263         'MC': '88.209.64.0/18',
   5264         'MD': '37.246.0.0/16',
   5265         'ME': '178.175.0.0/17',
   5266         'MF': '74.112.232.0/21',
   5267         'MG': '154.126.0.0/17',
   5268         'MH': '117.103.88.0/21',
   5269         'MK': '77.28.0.0/15',
   5270         'ML': '154.118.128.0/18',
   5271         'MM': '37.111.0.0/17',
   5272         'MN': '49.0.128.0/17',
   5273         'MO': '60.246.0.0/16',
   5274         'MP': '202.88.64.0/20',
   5275         'MQ': '109.203.224.0/19',
   5276         'MR': '41.188.64.0/18',
   5277         'MS': '208.90.112.0/22',
   5278         'MT': '46.11.0.0/16',
   5279         'MU': '105.16.0.0/12',
   5280         'MV': '27.114.128.0/18',
   5281         'MW': '102.70.0.0/15',
   5282         'MX': '187.192.0.0/11',
   5283         'MY': '175.136.0.0/13',
   5284         'MZ': '197.218.0.0/15',
   5285         'NA': '41.182.0.0/16',
   5286         'NC': '101.101.0.0/18',
   5287         'NE': '197.214.0.0/18',
   5288         'NF': '203.17.240.0/22',
   5289         'NG': '105.112.0.0/12',
   5290         'NI': '186.76.0.0/15',
   5291         'NL': '145.96.0.0/11',
   5292         'NO': '84.208.0.0/13',
   5293         'NP': '36.252.0.0/15',
   5294         'NR': '203.98.224.0/19',
   5295         'NU': '49.156.48.0/22',
   5296         'NZ': '49.224.0.0/14',
   5297         'OM': '5.36.0.0/15',
   5298         'PA': '186.72.0.0/15',
   5299         'PE': '186.160.0.0/14',
   5300         'PF': '123.50.64.0/18',
   5301         'PG': '124.240.192.0/19',
   5302         'PH': '49.144.0.0/13',
   5303         'PK': '39.32.0.0/11',
   5304         'PL': '83.0.0.0/11',
   5305         'PM': '70.36.0.0/20',
   5306         'PR': '66.50.0.0/16',
   5307         'PS': '188.161.0.0/16',
   5308         'PT': '85.240.0.0/13',
   5309         'PW': '202.124.224.0/20',
   5310         'PY': '181.120.0.0/14',
   5311         'QA': '37.210.0.0/15',
   5312         'RE': '102.35.0.0/16',
   5313         'RO': '79.112.0.0/13',
   5314         'RS': '93.86.0.0/15',
   5315         'RU': '5.136.0.0/13',
   5316         'RW': '41.186.0.0/16',
   5317         'SA': '188.48.0.0/13',
   5318         'SB': '202.1.160.0/19',
   5319         'SC': '154.192.0.0/11',
   5320         'SD': '102.120.0.0/13',
   5321         'SE': '78.64.0.0/12',
   5322         'SG': '8.128.0.0/10',
   5323         'SI': '188.196.0.0/14',
   5324         'SK': '78.98.0.0/15',
   5325         'SL': '102.143.0.0/17',
   5326         'SM': '89.186.32.0/19',
   5327         'SN': '41.82.0.0/15',
   5328         'SO': '154.115.192.0/18',
   5329         'SR': '186.179.128.0/17',
   5330         'SS': '105.235.208.0/21',
   5331         'ST': '197.159.160.0/19',
   5332         'SV': '168.243.0.0/16',
   5333         'SX': '190.102.0.0/20',
   5334         'SY': '5.0.0.0/16',
   5335         'SZ': '41.84.224.0/19',
   5336         'TC': '65.255.48.0/20',
   5337         'TD': '154.68.128.0/19',
   5338         'TG': '196.168.0.0/14',
   5339         'TH': '171.96.0.0/13',
   5340         'TJ': '85.9.128.0/18',
   5341         'TK': '27.96.24.0/21',
   5342         'TL': '180.189.160.0/20',
   5343         'TM': '95.85.96.0/19',
   5344         'TN': '197.0.0.0/11',
   5345         'TO': '175.176.144.0/21',
   5346         'TR': '78.160.0.0/11',
   5347         'TT': '186.44.0.0/15',
   5348         'TV': '202.2.96.0/19',
   5349         'TW': '120.96.0.0/11',
   5350         'TZ': '156.156.0.0/14',
   5351         'UA': '37.52.0.0/14',
   5352         'UG': '102.80.0.0/13',
   5353         'US': '6.0.0.0/8',
   5354         'UY': '167.56.0.0/13',
   5355         'UZ': '84.54.64.0/18',
   5356         'VA': '212.77.0.0/19',
   5357         'VC': '207.191.240.0/21',
   5358         'VE': '186.88.0.0/13',
   5359         'VG': '66.81.192.0/20',
   5360         'VI': '146.226.0.0/16',
   5361         'VN': '14.160.0.0/11',
   5362         'VU': '202.80.32.0/20',
   5363         'WF': '117.20.32.0/21',
   5364         'WS': '202.4.32.0/19',
   5365         'YE': '134.35.0.0/16',
   5366         'YT': '41.242.116.0/22',
   5367         'ZA': '41.0.0.0/11',
   5368         'ZM': '102.144.0.0/13',
   5369         'ZW': '102.177.192.0/18',
   5370     }
   5371 
   5372     @classmethod
   5373     def random_ipv4(cls, code_or_block):
   5374         if len(code_or_block) == 2:
   5375             block = cls._country_ip_map.get(code_or_block.upper())
   5376             if not block:
   5377                 return None
   5378         else:
   5379             block = code_or_block
   5380         addr, preflen = block.split('/')
   5381         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
   5382         addr_max = addr_min | (0xffffffff >> int(preflen))
   5383         return compat_str(socket.inet_ntoa(
   5384             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
   5385 
   5386 
   5387 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
   5388     def __init__(self, proxies=None):
   5389         # Set default handlers
   5390         for type in ('http', 'https'):
   5391             setattr(self, '%s_open' % type,
   5392                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
   5393                         meth(r, proxy, type))
   5394         compat_urllib_request.ProxyHandler.__init__(self, proxies)
   5395 
   5396     def proxy_open(self, req, proxy, type):
   5397         req_proxy = req.headers.get('Ytdl-request-proxy')
   5398         if req_proxy is not None:
   5399             proxy = req_proxy
   5400             del req.headers['Ytdl-request-proxy']
   5401 
   5402         if proxy == '__noproxy__':
   5403             return None  # No Proxy
   5404         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
   5405             req.add_header('Ytdl-socks-proxy', proxy)
   5406             # youtube-dl's http/https handlers do wrapping the socket with socks
   5407             return None
   5408         return compat_urllib_request.ProxyHandler.proxy_open(
   5409             self, req, proxy, type)
   5410 
   5411 
   5412 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
   5413 # released into Public Domain
   5414 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
   5415 
   5416 def long_to_bytes(n, blocksize=0):
   5417     """long_to_bytes(n:long, blocksize:int) : string
   5418     Convert a long integer to a byte string.
   5419 
   5420     If optional blocksize is given and greater than zero, pad the front of the
   5421     byte string with binary zeros so that the length is a multiple of
   5422     blocksize.
   5423     """
   5424     # after much testing, this algorithm was deemed to be the fastest
   5425     s = b''
   5426     n = int(n)
   5427     while n > 0:
   5428         s = compat_struct_pack('>I', n & 0xffffffff) + s
   5429         n = n >> 32
   5430     # strip off leading zeros
   5431     for i in range(len(s)):
   5432         if s[i] != b'\000'[0]:
   5433             break
   5434     else:
   5435         # only happens when n == 0
   5436         s = b'\000'
   5437         i = 0
   5438     s = s[i:]
   5439     # add back some pad bytes.  this could be done more efficiently w.r.t. the
   5440     # de-padding being done above, but sigh...
   5441     if blocksize > 0 and len(s) % blocksize:
   5442         s = (blocksize - len(s) % blocksize) * b'\000' + s
   5443     return s
   5444 
   5445 
   5446 def bytes_to_long(s):
   5447     """bytes_to_long(string) : long
   5448     Convert a byte string to a long integer.
   5449 
   5450     This is (essentially) the inverse of long_to_bytes().
   5451     """
   5452     acc = 0
   5453     length = len(s)
   5454     if length % 4:
   5455         extra = (4 - length % 4)
   5456         s = b'\000' * extra + s
   5457         length = length + extra
   5458     for i in range(0, length, 4):
   5459         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
   5460     return acc
   5461 
   5462 
   5463 def ohdave_rsa_encrypt(data, exponent, modulus):
   5464     '''
   5465     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
   5466 
   5467     Input:
   5468         data: data to encrypt, bytes-like object
   5469         exponent, modulus: parameter e and N of RSA algorithm, both integer
   5470     Output: hex string of encrypted data
   5471 
   5472     Limitation: supports one block encryption only
   5473     '''
   5474 
   5475     payload = int(binascii.hexlify(data[::-1]), 16)
   5476     encrypted = pow(payload, exponent, modulus)
   5477     return '%x' % encrypted
   5478 
   5479 
   5480 def pkcs1pad(data, length):
   5481     """
   5482     Padding input data with PKCS#1 scheme
   5483 
   5484     @param {int[]} data        input data
   5485     @param {int}   length      target length
   5486     @returns {int[]}           padded data
   5487     """
   5488     if len(data) > length - 11:
   5489         raise ValueError('Input data too long for PKCS#1 padding')
   5490 
   5491     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
   5492     return [0, 2] + pseudo_random + [0] + data
   5493 
   5494 
   5495 def encode_base_n(num, n, table=None):
   5496     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
   5497     if not table:
   5498         table = FULL_TABLE[:n]
   5499 
   5500     if n > len(table):
   5501         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
   5502 
   5503     if num == 0:
   5504         return table[0]
   5505 
   5506     ret = ''
   5507     while num:
   5508         ret = table[num % n] + ret
   5509         num = num // n
   5510     return ret
   5511 
   5512 
   5513 def decode_packed_codes(code):
   5514     mobj = re.search(PACKED_CODES_RE, code)
   5515     obfuscated_code, base, count, symbols = mobj.groups()
   5516     base = int(base)
   5517     count = int(count)
   5518     symbols = symbols.split('|')
   5519     symbol_table = {}
   5520 
   5521     while count:
   5522         count -= 1
   5523         base_n_count = encode_base_n(count, base)
   5524         symbol_table[base_n_count] = symbols[count] or base_n_count
   5525 
   5526     return re.sub(
   5527         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
   5528         obfuscated_code)
   5529 
   5530 
   5531 def caesar(s, alphabet, shift):
   5532     if shift == 0:
   5533         return s
   5534     l = len(alphabet)
   5535     return ''.join(
   5536         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
   5537         for c in s)
   5538 
   5539 
   5540 def rot47(s):
   5541     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
   5542 
   5543 
   5544 def parse_m3u8_attributes(attrib):
   5545     info = {}
   5546     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
   5547         if val.startswith('"'):
   5548             val = val[1:-1]
   5549         info[key] = val
   5550     return info
   5551 
   5552 
   5553 def urshift(val, n):
   5554     return val >> n if val >= 0 else (val + 0x100000000) >> n
   5555 
   5556 
   5557 # Based on png2str() written by @gdkchan and improved by @yokrysty
   5558 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
   5559 def decode_png(png_data):
   5560     # Reference: https://www.w3.org/TR/PNG/
   5561     header = png_data[8:]
   5562 
   5563     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
   5564         raise IOError('Not a valid PNG file.')
   5565 
   5566     int_map = {1: '>B', 2: '>H', 4: '>I'}
   5567     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
   5568 
   5569     chunks = []
   5570 
   5571     while header:
   5572         length = unpack_integer(header[:4])
   5573         header = header[4:]
   5574 
   5575         chunk_type = header[:4]
   5576         header = header[4:]
   5577 
   5578         chunk_data = header[:length]
   5579         header = header[length:]
   5580 
   5581         header = header[4:]  # Skip CRC
   5582 
   5583         chunks.append({
   5584             'type': chunk_type,
   5585             'length': length,
   5586             'data': chunk_data
   5587         })
   5588 
   5589     ihdr = chunks[0]['data']
   5590 
   5591     width = unpack_integer(ihdr[:4])
   5592     height = unpack_integer(ihdr[4:8])
   5593 
   5594     idat = b''
   5595 
   5596     for chunk in chunks:
   5597         if chunk['type'] == b'IDAT':
   5598             idat += chunk['data']
   5599 
   5600     if not idat:
   5601         raise IOError('Unable to read PNG data.')
   5602 
   5603     decompressed_data = bytearray(zlib.decompress(idat))
   5604 
   5605     stride = width * 3
   5606     pixels = []
   5607 
   5608     def _get_pixel(idx):
   5609         x = idx % stride
   5610         y = idx // stride
   5611         return pixels[y][x]
   5612 
   5613     for y in range(height):
   5614         basePos = y * (1 + stride)
   5615         filter_type = decompressed_data[basePos]
   5616 
   5617         current_row = []
   5618 
   5619         pixels.append(current_row)
   5620 
   5621         for x in range(stride):
   5622             color = decompressed_data[1 + basePos + x]
   5623             basex = y * stride + x
   5624             left = 0
   5625             up = 0
   5626 
   5627             if x > 2:
   5628                 left = _get_pixel(basex - 3)
   5629             if y > 0:
   5630                 up = _get_pixel(basex - stride)
   5631 
   5632             if filter_type == 1:  # Sub
   5633                 color = (color + left) & 0xff
   5634             elif filter_type == 2:  # Up
   5635                 color = (color + up) & 0xff
   5636             elif filter_type == 3:  # Average
   5637                 color = (color + ((left + up) >> 1)) & 0xff
   5638             elif filter_type == 4:  # Paeth
   5639                 a = left
   5640                 b = up
   5641                 c = 0
   5642 
   5643                 if x > 2 and y > 0:
   5644                     c = _get_pixel(basex - stride - 3)
   5645 
   5646                 p = a + b - c
   5647 
   5648                 pa = abs(p - a)
   5649                 pb = abs(p - b)
   5650                 pc = abs(p - c)
   5651 
   5652                 if pa <= pb and pa <= pc:
   5653                     color = (color + a) & 0xff
   5654                 elif pb <= pc:
   5655                     color = (color + b) & 0xff
   5656                 else:
   5657                     color = (color + c) & 0xff
   5658 
   5659             current_row.append(color)
   5660 
   5661     return width, height, pixels
   5662 
   5663 
   5664 def write_xattr(path, key, value):
   5665     # This mess below finds the best xattr tool for the job
   5666     try:
   5667         # try the pyxattr module...
   5668         import xattr
   5669 
   5670         if hasattr(xattr, 'set'):  # pyxattr
   5671             # Unicode arguments are not supported in python-pyxattr until
   5672             # version 0.5.0
   5673             # See https://github.com/ytdl-org/youtube-dl/issues/5498
   5674             pyxattr_required_version = '0.5.0'
   5675             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
   5676                 # TODO: fallback to CLI tools
   5677                 raise XAttrUnavailableError(
   5678                     'python-pyxattr is detected but is too old. '
   5679                     'youtube-dl requires %s or above while your version is %s. '
   5680                     'Falling back to other xattr implementations' % (
   5681                         pyxattr_required_version, xattr.__version__))
   5682 
   5683             setxattr = xattr.set
   5684         else:  # xattr
   5685             setxattr = xattr.setxattr
   5686 
   5687         try:
   5688             setxattr(path, key, value)
   5689         except EnvironmentError as e:
   5690             raise XAttrMetadataError(e.errno, e.strerror)
   5691 
   5692     except ImportError:
   5693         if compat_os_name == 'nt':
   5694             # Write xattrs to NTFS Alternate Data Streams:
   5695             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
   5696             assert ':' not in key
   5697             assert os.path.exists(path)
   5698 
   5699             ads_fn = path + ':' + key
   5700             try:
   5701                 with open(ads_fn, 'wb') as f:
   5702                     f.write(value)
   5703             except EnvironmentError as e:
   5704                 raise XAttrMetadataError(e.errno, e.strerror)
   5705         else:
   5706             user_has_setfattr = check_executable('setfattr', ['--version'])
   5707             user_has_xattr = check_executable('xattr', ['-h'])
   5708 
   5709             if user_has_setfattr or user_has_xattr:
   5710 
   5711                 value = value.decode('utf-8')
   5712                 if user_has_setfattr:
   5713                     executable = 'setfattr'
   5714                     opts = ['-n', key, '-v', value]
   5715                 elif user_has_xattr:
   5716                     executable = 'xattr'
   5717                     opts = ['-w', key, value]
   5718 
   5719                 cmd = ([encodeFilename(executable, True)]
   5720                        + [encodeArgument(o) for o in opts]
   5721                        + [encodeFilename(path, True)])
   5722 
   5723                 try:
   5724                     p = subprocess.Popen(
   5725                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
   5726                 except EnvironmentError as e:
   5727                     raise XAttrMetadataError(e.errno, e.strerror)
   5728                 stdout, stderr = p.communicate()
   5729                 stderr = stderr.decode('utf-8', 'replace')
   5730                 if p.returncode != 0:
   5731                     raise XAttrMetadataError(p.returncode, stderr)
   5732 
   5733             else:
   5734                 # On Unix, and can't find pyxattr, setfattr, or xattr.
   5735                 if sys.platform.startswith('linux'):
   5736                     raise XAttrUnavailableError(
   5737                         "Couldn't find a tool to set the xattrs. "
   5738                         "Install either the python 'pyxattr' or 'xattr' "
   5739                         "modules, or the GNU 'attr' package "
   5740                         "(which contains the 'setfattr' tool).")
   5741                 else:
   5742                     raise XAttrUnavailableError(
   5743                         "Couldn't find a tool to set the xattrs. "
   5744                         "Install either the python 'xattr' module, "
   5745                         "or the 'xattr' binary.")
   5746 
   5747 
   5748 def random_birthday(year_field, month_field, day_field):
   5749     start_date = datetime.date(1950, 1, 1)
   5750     end_date = datetime.date(1995, 12, 31)
   5751     offset = random.randint(0, (end_date - start_date).days)
   5752     random_date = start_date + datetime.timedelta(offset)
   5753     return {
   5754         year_field: str(random_date.year),
   5755         month_field: str(random_date.month),
   5756         day_field: str(random_date.day),
   5757     }
   5758 
   5759 
   5760 def clean_podcast_url(url):
   5761     return re.sub(r'''(?x)
   5762         (?:
   5763             (?:
   5764                 chtbl\.com/track|
   5765                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
   5766                 play\.podtrac\.com
   5767             )/[^/]+|
   5768             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
   5769             flex\.acast\.com|
   5770             pd(?:
   5771                 cn\.co| # https://podcorn.com/analytics-prefix/
   5772                 st\.fm # https://podsights.com/docs/
   5773             )/e
   5774         )/''', '', url)