utils.py (170851B)
1 #!/usr/bin/env python 2 # coding: utf-8 3 4 from __future__ import unicode_literals 5 6 import base64 7 import binascii 8 import calendar 9 import codecs 10 import collections 11 import contextlib 12 import ctypes 13 import datetime 14 import email.utils 15 import email.header 16 import errno 17 import functools 18 import gzip 19 import io 20 import itertools 21 import json 22 import locale 23 import math 24 import operator 25 import os 26 import platform 27 import random 28 import re 29 import socket 30 import ssl 31 import subprocess 32 import sys 33 import tempfile 34 import time 35 import traceback 36 import xml.etree.ElementTree 37 import zlib 38 39 from .compat import ( 40 compat_HTMLParseError, 41 compat_HTMLParser, 42 compat_HTTPError, 43 compat_basestring, 44 compat_chr, 45 compat_cookiejar, 46 compat_ctypes_WINFUNCTYPE, 47 compat_etree_fromstring, 48 compat_expanduser, 49 compat_html_entities, 50 compat_html_entities_html5, 51 compat_http_client, 52 compat_integer_types, 53 compat_kwargs, 54 compat_os_name, 55 compat_parse_qs, 56 compat_shlex_quote, 57 compat_str, 58 compat_struct_pack, 59 compat_struct_unpack, 60 compat_urllib_error, 61 compat_urllib_parse, 62 compat_urllib_parse_urlencode, 63 compat_urllib_parse_urlparse, 64 compat_urllib_parse_unquote_plus, 65 compat_urllib_request, 66 compat_urlparse, 67 compat_xpath, 68 ) 69 70 from .socks import ( 71 ProxyType, 72 sockssocket, 73 ) 74 75 76 def register_socks_protocols(): 77 # "Register" SOCKS protocols 78 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904 79 # URLs with protocols not in urlparse.uses_netloc are not handled correctly 80 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'): 81 if scheme not in compat_urlparse.uses_netloc: 82 compat_urlparse.uses_netloc.append(scheme) 83 84 85 # This is not clearly defined otherwise 86 compiled_regex_type = type(re.compile('')) 87 88 89 def random_user_agent(): 90 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36' 91 _CHROME_VERSIONS = ( 92 '74.0.3729.129', 93 '76.0.3780.3', 94 '76.0.3780.2', 95 '74.0.3729.128', 96 '76.0.3780.1', 97 '76.0.3780.0', 98 '75.0.3770.15', 99 '74.0.3729.127', 100 '74.0.3729.126', 101 '76.0.3779.1', 102 '76.0.3779.0', 103 '75.0.3770.14', 104 '74.0.3729.125', 105 '76.0.3778.1', 106 '76.0.3778.0', 107 '75.0.3770.13', 108 '74.0.3729.124', 109 '74.0.3729.123', 110 '73.0.3683.121', 111 '76.0.3777.1', 112 '76.0.3777.0', 113 '75.0.3770.12', 114 '74.0.3729.122', 115 '76.0.3776.4', 116 '75.0.3770.11', 117 '74.0.3729.121', 118 '76.0.3776.3', 119 '76.0.3776.2', 120 '73.0.3683.120', 121 '74.0.3729.120', 122 '74.0.3729.119', 123 '74.0.3729.118', 124 '76.0.3776.1', 125 '76.0.3776.0', 126 '76.0.3775.5', 127 '75.0.3770.10', 128 '74.0.3729.117', 129 '76.0.3775.4', 130 '76.0.3775.3', 131 '74.0.3729.116', 132 '75.0.3770.9', 133 '76.0.3775.2', 134 '76.0.3775.1', 135 '76.0.3775.0', 136 '75.0.3770.8', 137 '74.0.3729.115', 138 '74.0.3729.114', 139 '76.0.3774.1', 140 '76.0.3774.0', 141 '75.0.3770.7', 142 '74.0.3729.113', 143 '74.0.3729.112', 144 '74.0.3729.111', 145 '76.0.3773.1', 146 '76.0.3773.0', 147 '75.0.3770.6', 148 '74.0.3729.110', 149 '74.0.3729.109', 150 '76.0.3772.1', 151 '76.0.3772.0', 152 '75.0.3770.5', 153 '74.0.3729.108', 154 '74.0.3729.107', 155 '76.0.3771.1', 156 '76.0.3771.0', 157 '75.0.3770.4', 158 '74.0.3729.106', 159 '74.0.3729.105', 160 '75.0.3770.3', 161 '74.0.3729.104', 162 '74.0.3729.103', 163 '74.0.3729.102', 164 '75.0.3770.2', 165 '74.0.3729.101', 166 '75.0.3770.1', 167 '75.0.3770.0', 168 '74.0.3729.100', 169 '75.0.3769.5', 170 '75.0.3769.4', 171 '74.0.3729.99', 172 '75.0.3769.3', 173 '75.0.3769.2', 174 '75.0.3768.6', 175 '74.0.3729.98', 176 '75.0.3769.1', 177 '75.0.3769.0', 178 '74.0.3729.97', 179 '73.0.3683.119', 180 '73.0.3683.118', 181 '74.0.3729.96', 182 '75.0.3768.5', 183 '75.0.3768.4', 184 '75.0.3768.3', 185 '75.0.3768.2', 186 '74.0.3729.95', 187 '74.0.3729.94', 188 '75.0.3768.1', 189 '75.0.3768.0', 190 '74.0.3729.93', 191 '74.0.3729.92', 192 '73.0.3683.117', 193 '74.0.3729.91', 194 '75.0.3766.3', 195 '74.0.3729.90', 196 '75.0.3767.2', 197 '75.0.3767.1', 198 '75.0.3767.0', 199 '74.0.3729.89', 200 '73.0.3683.116', 201 '75.0.3766.2', 202 '74.0.3729.88', 203 '75.0.3766.1', 204 '75.0.3766.0', 205 '74.0.3729.87', 206 '73.0.3683.115', 207 '74.0.3729.86', 208 '75.0.3765.1', 209 '75.0.3765.0', 210 '74.0.3729.85', 211 '73.0.3683.114', 212 '74.0.3729.84', 213 '75.0.3764.1', 214 '75.0.3764.0', 215 '74.0.3729.83', 216 '73.0.3683.113', 217 '75.0.3763.2', 218 '75.0.3761.4', 219 '74.0.3729.82', 220 '75.0.3763.1', 221 '75.0.3763.0', 222 '74.0.3729.81', 223 '73.0.3683.112', 224 '75.0.3762.1', 225 '75.0.3762.0', 226 '74.0.3729.80', 227 '75.0.3761.3', 228 '74.0.3729.79', 229 '73.0.3683.111', 230 '75.0.3761.2', 231 '74.0.3729.78', 232 '74.0.3729.77', 233 '75.0.3761.1', 234 '75.0.3761.0', 235 '73.0.3683.110', 236 '74.0.3729.76', 237 '74.0.3729.75', 238 '75.0.3760.0', 239 '74.0.3729.74', 240 '75.0.3759.8', 241 '75.0.3759.7', 242 '75.0.3759.6', 243 '74.0.3729.73', 244 '75.0.3759.5', 245 '74.0.3729.72', 246 '73.0.3683.109', 247 '75.0.3759.4', 248 '75.0.3759.3', 249 '74.0.3729.71', 250 '75.0.3759.2', 251 '74.0.3729.70', 252 '73.0.3683.108', 253 '74.0.3729.69', 254 '75.0.3759.1', 255 '75.0.3759.0', 256 '74.0.3729.68', 257 '73.0.3683.107', 258 '74.0.3729.67', 259 '75.0.3758.1', 260 '75.0.3758.0', 261 '74.0.3729.66', 262 '73.0.3683.106', 263 '74.0.3729.65', 264 '75.0.3757.1', 265 '75.0.3757.0', 266 '74.0.3729.64', 267 '73.0.3683.105', 268 '74.0.3729.63', 269 '75.0.3756.1', 270 '75.0.3756.0', 271 '74.0.3729.62', 272 '73.0.3683.104', 273 '75.0.3755.3', 274 '75.0.3755.2', 275 '73.0.3683.103', 276 '75.0.3755.1', 277 '75.0.3755.0', 278 '74.0.3729.61', 279 '73.0.3683.102', 280 '74.0.3729.60', 281 '75.0.3754.2', 282 '74.0.3729.59', 283 '75.0.3753.4', 284 '74.0.3729.58', 285 '75.0.3754.1', 286 '75.0.3754.0', 287 '74.0.3729.57', 288 '73.0.3683.101', 289 '75.0.3753.3', 290 '75.0.3752.2', 291 '75.0.3753.2', 292 '74.0.3729.56', 293 '75.0.3753.1', 294 '75.0.3753.0', 295 '74.0.3729.55', 296 '73.0.3683.100', 297 '74.0.3729.54', 298 '75.0.3752.1', 299 '75.0.3752.0', 300 '74.0.3729.53', 301 '73.0.3683.99', 302 '74.0.3729.52', 303 '75.0.3751.1', 304 '75.0.3751.0', 305 '74.0.3729.51', 306 '73.0.3683.98', 307 '74.0.3729.50', 308 '75.0.3750.0', 309 '74.0.3729.49', 310 '74.0.3729.48', 311 '74.0.3729.47', 312 '75.0.3749.3', 313 '74.0.3729.46', 314 '73.0.3683.97', 315 '75.0.3749.2', 316 '74.0.3729.45', 317 '75.0.3749.1', 318 '75.0.3749.0', 319 '74.0.3729.44', 320 '73.0.3683.96', 321 '74.0.3729.43', 322 '74.0.3729.42', 323 '75.0.3748.1', 324 '75.0.3748.0', 325 '74.0.3729.41', 326 '75.0.3747.1', 327 '73.0.3683.95', 328 '75.0.3746.4', 329 '74.0.3729.40', 330 '74.0.3729.39', 331 '75.0.3747.0', 332 '75.0.3746.3', 333 '75.0.3746.2', 334 '74.0.3729.38', 335 '75.0.3746.1', 336 '75.0.3746.0', 337 '74.0.3729.37', 338 '73.0.3683.94', 339 '75.0.3745.5', 340 '75.0.3745.4', 341 '75.0.3745.3', 342 '75.0.3745.2', 343 '74.0.3729.36', 344 '75.0.3745.1', 345 '75.0.3745.0', 346 '75.0.3744.2', 347 '74.0.3729.35', 348 '73.0.3683.93', 349 '74.0.3729.34', 350 '75.0.3744.1', 351 '75.0.3744.0', 352 '74.0.3729.33', 353 '73.0.3683.92', 354 '74.0.3729.32', 355 '74.0.3729.31', 356 '73.0.3683.91', 357 '75.0.3741.2', 358 '75.0.3740.5', 359 '74.0.3729.30', 360 '75.0.3741.1', 361 '75.0.3741.0', 362 '74.0.3729.29', 363 '75.0.3740.4', 364 '73.0.3683.90', 365 '74.0.3729.28', 366 '75.0.3740.3', 367 '73.0.3683.89', 368 '75.0.3740.2', 369 '74.0.3729.27', 370 '75.0.3740.1', 371 '75.0.3740.0', 372 '74.0.3729.26', 373 '73.0.3683.88', 374 '73.0.3683.87', 375 '74.0.3729.25', 376 '75.0.3739.1', 377 '75.0.3739.0', 378 '73.0.3683.86', 379 '74.0.3729.24', 380 '73.0.3683.85', 381 '75.0.3738.4', 382 '75.0.3738.3', 383 '75.0.3738.2', 384 '75.0.3738.1', 385 '75.0.3738.0', 386 '74.0.3729.23', 387 '73.0.3683.84', 388 '74.0.3729.22', 389 '74.0.3729.21', 390 '75.0.3737.1', 391 '75.0.3737.0', 392 '74.0.3729.20', 393 '73.0.3683.83', 394 '74.0.3729.19', 395 '75.0.3736.1', 396 '75.0.3736.0', 397 '74.0.3729.18', 398 '73.0.3683.82', 399 '74.0.3729.17', 400 '75.0.3735.1', 401 '75.0.3735.0', 402 '74.0.3729.16', 403 '73.0.3683.81', 404 '75.0.3734.1', 405 '75.0.3734.0', 406 '74.0.3729.15', 407 '73.0.3683.80', 408 '74.0.3729.14', 409 '75.0.3733.1', 410 '75.0.3733.0', 411 '75.0.3732.1', 412 '74.0.3729.13', 413 '74.0.3729.12', 414 '73.0.3683.79', 415 '74.0.3729.11', 416 '75.0.3732.0', 417 '74.0.3729.10', 418 '73.0.3683.78', 419 '74.0.3729.9', 420 '74.0.3729.8', 421 '74.0.3729.7', 422 '75.0.3731.3', 423 '75.0.3731.2', 424 '75.0.3731.0', 425 '74.0.3729.6', 426 '73.0.3683.77', 427 '73.0.3683.76', 428 '75.0.3730.5', 429 '75.0.3730.4', 430 '73.0.3683.75', 431 '74.0.3729.5', 432 '73.0.3683.74', 433 '75.0.3730.3', 434 '75.0.3730.2', 435 '74.0.3729.4', 436 '73.0.3683.73', 437 '73.0.3683.72', 438 '75.0.3730.1', 439 '75.0.3730.0', 440 '74.0.3729.3', 441 '73.0.3683.71', 442 '74.0.3729.2', 443 '73.0.3683.70', 444 '74.0.3729.1', 445 '74.0.3729.0', 446 '74.0.3726.4', 447 '73.0.3683.69', 448 '74.0.3726.3', 449 '74.0.3728.0', 450 '74.0.3726.2', 451 '73.0.3683.68', 452 '74.0.3726.1', 453 '74.0.3726.0', 454 '74.0.3725.4', 455 '73.0.3683.67', 456 '73.0.3683.66', 457 '74.0.3725.3', 458 '74.0.3725.2', 459 '74.0.3725.1', 460 '74.0.3724.8', 461 '74.0.3725.0', 462 '73.0.3683.65', 463 '74.0.3724.7', 464 '74.0.3724.6', 465 '74.0.3724.5', 466 '74.0.3724.4', 467 '74.0.3724.3', 468 '74.0.3724.2', 469 '74.0.3724.1', 470 '74.0.3724.0', 471 '73.0.3683.64', 472 '74.0.3723.1', 473 '74.0.3723.0', 474 '73.0.3683.63', 475 '74.0.3722.1', 476 '74.0.3722.0', 477 '73.0.3683.62', 478 '74.0.3718.9', 479 '74.0.3702.3', 480 '74.0.3721.3', 481 '74.0.3721.2', 482 '74.0.3721.1', 483 '74.0.3721.0', 484 '74.0.3720.6', 485 '73.0.3683.61', 486 '72.0.3626.122', 487 '73.0.3683.60', 488 '74.0.3720.5', 489 '72.0.3626.121', 490 '74.0.3718.8', 491 '74.0.3720.4', 492 '74.0.3720.3', 493 '74.0.3718.7', 494 '74.0.3720.2', 495 '74.0.3720.1', 496 '74.0.3720.0', 497 '74.0.3718.6', 498 '74.0.3719.5', 499 '73.0.3683.59', 500 '74.0.3718.5', 501 '74.0.3718.4', 502 '74.0.3719.4', 503 '74.0.3719.3', 504 '74.0.3719.2', 505 '74.0.3719.1', 506 '73.0.3683.58', 507 '74.0.3719.0', 508 '73.0.3683.57', 509 '73.0.3683.56', 510 '74.0.3718.3', 511 '73.0.3683.55', 512 '74.0.3718.2', 513 '74.0.3718.1', 514 '74.0.3718.0', 515 '73.0.3683.54', 516 '74.0.3717.2', 517 '73.0.3683.53', 518 '74.0.3717.1', 519 '74.0.3717.0', 520 '73.0.3683.52', 521 '74.0.3716.1', 522 '74.0.3716.0', 523 '73.0.3683.51', 524 '74.0.3715.1', 525 '74.0.3715.0', 526 '73.0.3683.50', 527 '74.0.3711.2', 528 '74.0.3714.2', 529 '74.0.3713.3', 530 '74.0.3714.1', 531 '74.0.3714.0', 532 '73.0.3683.49', 533 '74.0.3713.1', 534 '74.0.3713.0', 535 '72.0.3626.120', 536 '73.0.3683.48', 537 '74.0.3712.2', 538 '74.0.3712.1', 539 '74.0.3712.0', 540 '73.0.3683.47', 541 '72.0.3626.119', 542 '73.0.3683.46', 543 '74.0.3710.2', 544 '72.0.3626.118', 545 '74.0.3711.1', 546 '74.0.3711.0', 547 '73.0.3683.45', 548 '72.0.3626.117', 549 '74.0.3710.1', 550 '74.0.3710.0', 551 '73.0.3683.44', 552 '72.0.3626.116', 553 '74.0.3709.1', 554 '74.0.3709.0', 555 '74.0.3704.9', 556 '73.0.3683.43', 557 '72.0.3626.115', 558 '74.0.3704.8', 559 '74.0.3704.7', 560 '74.0.3708.0', 561 '74.0.3706.7', 562 '74.0.3704.6', 563 '73.0.3683.42', 564 '72.0.3626.114', 565 '74.0.3706.6', 566 '72.0.3626.113', 567 '74.0.3704.5', 568 '74.0.3706.5', 569 '74.0.3706.4', 570 '74.0.3706.3', 571 '74.0.3706.2', 572 '74.0.3706.1', 573 '74.0.3706.0', 574 '73.0.3683.41', 575 '72.0.3626.112', 576 '74.0.3705.1', 577 '74.0.3705.0', 578 '73.0.3683.40', 579 '72.0.3626.111', 580 '73.0.3683.39', 581 '74.0.3704.4', 582 '73.0.3683.38', 583 '74.0.3704.3', 584 '74.0.3704.2', 585 '74.0.3704.1', 586 '74.0.3704.0', 587 '73.0.3683.37', 588 '72.0.3626.110', 589 '72.0.3626.109', 590 '74.0.3703.3', 591 '74.0.3703.2', 592 '73.0.3683.36', 593 '74.0.3703.1', 594 '74.0.3703.0', 595 '73.0.3683.35', 596 '72.0.3626.108', 597 '74.0.3702.2', 598 '74.0.3699.3', 599 '74.0.3702.1', 600 '74.0.3702.0', 601 '73.0.3683.34', 602 '72.0.3626.107', 603 '73.0.3683.33', 604 '74.0.3701.1', 605 '74.0.3701.0', 606 '73.0.3683.32', 607 '73.0.3683.31', 608 '72.0.3626.105', 609 '74.0.3700.1', 610 '74.0.3700.0', 611 '73.0.3683.29', 612 '72.0.3626.103', 613 '74.0.3699.2', 614 '74.0.3699.1', 615 '74.0.3699.0', 616 '73.0.3683.28', 617 '72.0.3626.102', 618 '73.0.3683.27', 619 '73.0.3683.26', 620 '74.0.3698.0', 621 '74.0.3696.2', 622 '72.0.3626.101', 623 '73.0.3683.25', 624 '74.0.3696.1', 625 '74.0.3696.0', 626 '74.0.3694.8', 627 '72.0.3626.100', 628 '74.0.3694.7', 629 '74.0.3694.6', 630 '74.0.3694.5', 631 '74.0.3694.4', 632 '72.0.3626.99', 633 '72.0.3626.98', 634 '74.0.3694.3', 635 '73.0.3683.24', 636 '72.0.3626.97', 637 '72.0.3626.96', 638 '72.0.3626.95', 639 '73.0.3683.23', 640 '72.0.3626.94', 641 '73.0.3683.22', 642 '73.0.3683.21', 643 '72.0.3626.93', 644 '74.0.3694.2', 645 '72.0.3626.92', 646 '74.0.3694.1', 647 '74.0.3694.0', 648 '74.0.3693.6', 649 '73.0.3683.20', 650 '72.0.3626.91', 651 '74.0.3693.5', 652 '74.0.3693.4', 653 '74.0.3693.3', 654 '74.0.3693.2', 655 '73.0.3683.19', 656 '74.0.3693.1', 657 '74.0.3693.0', 658 '73.0.3683.18', 659 '72.0.3626.90', 660 '74.0.3692.1', 661 '74.0.3692.0', 662 '73.0.3683.17', 663 '72.0.3626.89', 664 '74.0.3687.3', 665 '74.0.3691.1', 666 '74.0.3691.0', 667 '73.0.3683.16', 668 '72.0.3626.88', 669 '72.0.3626.87', 670 '73.0.3683.15', 671 '74.0.3690.1', 672 '74.0.3690.0', 673 '73.0.3683.14', 674 '72.0.3626.86', 675 '73.0.3683.13', 676 '73.0.3683.12', 677 '74.0.3689.1', 678 '74.0.3689.0', 679 '73.0.3683.11', 680 '72.0.3626.85', 681 '73.0.3683.10', 682 '72.0.3626.84', 683 '73.0.3683.9', 684 '74.0.3688.1', 685 '74.0.3688.0', 686 '73.0.3683.8', 687 '72.0.3626.83', 688 '74.0.3687.2', 689 '74.0.3687.1', 690 '74.0.3687.0', 691 '73.0.3683.7', 692 '72.0.3626.82', 693 '74.0.3686.4', 694 '72.0.3626.81', 695 '74.0.3686.3', 696 '74.0.3686.2', 697 '74.0.3686.1', 698 '74.0.3686.0', 699 '73.0.3683.6', 700 '72.0.3626.80', 701 '74.0.3685.1', 702 '74.0.3685.0', 703 '73.0.3683.5', 704 '72.0.3626.79', 705 '74.0.3684.1', 706 '74.0.3684.0', 707 '73.0.3683.4', 708 '72.0.3626.78', 709 '72.0.3626.77', 710 '73.0.3683.3', 711 '73.0.3683.2', 712 '72.0.3626.76', 713 '73.0.3683.1', 714 '73.0.3683.0', 715 '72.0.3626.75', 716 '71.0.3578.141', 717 '73.0.3682.1', 718 '73.0.3682.0', 719 '72.0.3626.74', 720 '71.0.3578.140', 721 '73.0.3681.4', 722 '73.0.3681.3', 723 '73.0.3681.2', 724 '73.0.3681.1', 725 '73.0.3681.0', 726 '72.0.3626.73', 727 '71.0.3578.139', 728 '72.0.3626.72', 729 '72.0.3626.71', 730 '73.0.3680.1', 731 '73.0.3680.0', 732 '72.0.3626.70', 733 '71.0.3578.138', 734 '73.0.3678.2', 735 '73.0.3679.1', 736 '73.0.3679.0', 737 '72.0.3626.69', 738 '71.0.3578.137', 739 '73.0.3678.1', 740 '73.0.3678.0', 741 '71.0.3578.136', 742 '73.0.3677.1', 743 '73.0.3677.0', 744 '72.0.3626.68', 745 '72.0.3626.67', 746 '71.0.3578.135', 747 '73.0.3676.1', 748 '73.0.3676.0', 749 '73.0.3674.2', 750 '72.0.3626.66', 751 '71.0.3578.134', 752 '73.0.3674.1', 753 '73.0.3674.0', 754 '72.0.3626.65', 755 '71.0.3578.133', 756 '73.0.3673.2', 757 '73.0.3673.1', 758 '73.0.3673.0', 759 '72.0.3626.64', 760 '71.0.3578.132', 761 '72.0.3626.63', 762 '72.0.3626.62', 763 '72.0.3626.61', 764 '72.0.3626.60', 765 '73.0.3672.1', 766 '73.0.3672.0', 767 '72.0.3626.59', 768 '71.0.3578.131', 769 '73.0.3671.3', 770 '73.0.3671.2', 771 '73.0.3671.1', 772 '73.0.3671.0', 773 '72.0.3626.58', 774 '71.0.3578.130', 775 '73.0.3670.1', 776 '73.0.3670.0', 777 '72.0.3626.57', 778 '71.0.3578.129', 779 '73.0.3669.1', 780 '73.0.3669.0', 781 '72.0.3626.56', 782 '71.0.3578.128', 783 '73.0.3668.2', 784 '73.0.3668.1', 785 '73.0.3668.0', 786 '72.0.3626.55', 787 '71.0.3578.127', 788 '73.0.3667.2', 789 '73.0.3667.1', 790 '73.0.3667.0', 791 '72.0.3626.54', 792 '71.0.3578.126', 793 '73.0.3666.1', 794 '73.0.3666.0', 795 '72.0.3626.53', 796 '71.0.3578.125', 797 '73.0.3665.4', 798 '73.0.3665.3', 799 '72.0.3626.52', 800 '73.0.3665.2', 801 '73.0.3664.4', 802 '73.0.3665.1', 803 '73.0.3665.0', 804 '72.0.3626.51', 805 '71.0.3578.124', 806 '72.0.3626.50', 807 '73.0.3664.3', 808 '73.0.3664.2', 809 '73.0.3664.1', 810 '73.0.3664.0', 811 '73.0.3663.2', 812 '72.0.3626.49', 813 '71.0.3578.123', 814 '73.0.3663.1', 815 '73.0.3663.0', 816 '72.0.3626.48', 817 '71.0.3578.122', 818 '73.0.3662.1', 819 '73.0.3662.0', 820 '72.0.3626.47', 821 '71.0.3578.121', 822 '73.0.3661.1', 823 '72.0.3626.46', 824 '73.0.3661.0', 825 '72.0.3626.45', 826 '71.0.3578.120', 827 '73.0.3660.2', 828 '73.0.3660.1', 829 '73.0.3660.0', 830 '72.0.3626.44', 831 '71.0.3578.119', 832 '73.0.3659.1', 833 '73.0.3659.0', 834 '72.0.3626.43', 835 '71.0.3578.118', 836 '73.0.3658.1', 837 '73.0.3658.0', 838 '72.0.3626.42', 839 '71.0.3578.117', 840 '73.0.3657.1', 841 '73.0.3657.0', 842 '72.0.3626.41', 843 '71.0.3578.116', 844 '73.0.3656.1', 845 '73.0.3656.0', 846 '72.0.3626.40', 847 '71.0.3578.115', 848 '73.0.3655.1', 849 '73.0.3655.0', 850 '72.0.3626.39', 851 '71.0.3578.114', 852 '73.0.3654.1', 853 '73.0.3654.0', 854 '72.0.3626.38', 855 '71.0.3578.113', 856 '73.0.3653.1', 857 '73.0.3653.0', 858 '72.0.3626.37', 859 '71.0.3578.112', 860 '73.0.3652.1', 861 '73.0.3652.0', 862 '72.0.3626.36', 863 '71.0.3578.111', 864 '73.0.3651.1', 865 '73.0.3651.0', 866 '72.0.3626.35', 867 '71.0.3578.110', 868 '73.0.3650.1', 869 '73.0.3650.0', 870 '72.0.3626.34', 871 '71.0.3578.109', 872 '73.0.3649.1', 873 '73.0.3649.0', 874 '72.0.3626.33', 875 '71.0.3578.108', 876 '73.0.3648.2', 877 '73.0.3648.1', 878 '73.0.3648.0', 879 '72.0.3626.32', 880 '71.0.3578.107', 881 '73.0.3647.2', 882 '73.0.3647.1', 883 '73.0.3647.0', 884 '72.0.3626.31', 885 '71.0.3578.106', 886 '73.0.3635.3', 887 '73.0.3646.2', 888 '73.0.3646.1', 889 '73.0.3646.0', 890 '72.0.3626.30', 891 '71.0.3578.105', 892 '72.0.3626.29', 893 '73.0.3645.2', 894 '73.0.3645.1', 895 '73.0.3645.0', 896 '72.0.3626.28', 897 '71.0.3578.104', 898 '72.0.3626.27', 899 '72.0.3626.26', 900 '72.0.3626.25', 901 '72.0.3626.24', 902 '73.0.3644.0', 903 '73.0.3643.2', 904 '72.0.3626.23', 905 '71.0.3578.103', 906 '73.0.3643.1', 907 '73.0.3643.0', 908 '72.0.3626.22', 909 '71.0.3578.102', 910 '73.0.3642.1', 911 '73.0.3642.0', 912 '72.0.3626.21', 913 '71.0.3578.101', 914 '73.0.3641.1', 915 '73.0.3641.0', 916 '72.0.3626.20', 917 '71.0.3578.100', 918 '72.0.3626.19', 919 '73.0.3640.1', 920 '73.0.3640.0', 921 '72.0.3626.18', 922 '73.0.3639.1', 923 '71.0.3578.99', 924 '73.0.3639.0', 925 '72.0.3626.17', 926 '73.0.3638.2', 927 '72.0.3626.16', 928 '73.0.3638.1', 929 '73.0.3638.0', 930 '72.0.3626.15', 931 '71.0.3578.98', 932 '73.0.3635.2', 933 '71.0.3578.97', 934 '73.0.3637.1', 935 '73.0.3637.0', 936 '72.0.3626.14', 937 '71.0.3578.96', 938 '71.0.3578.95', 939 '72.0.3626.13', 940 '71.0.3578.94', 941 '73.0.3636.2', 942 '71.0.3578.93', 943 '73.0.3636.1', 944 '73.0.3636.0', 945 '72.0.3626.12', 946 '71.0.3578.92', 947 '73.0.3635.1', 948 '73.0.3635.0', 949 '72.0.3626.11', 950 '71.0.3578.91', 951 '73.0.3634.2', 952 '73.0.3634.1', 953 '73.0.3634.0', 954 '72.0.3626.10', 955 '71.0.3578.90', 956 '71.0.3578.89', 957 '73.0.3633.2', 958 '73.0.3633.1', 959 '73.0.3633.0', 960 '72.0.3610.4', 961 '72.0.3626.9', 962 '71.0.3578.88', 963 '73.0.3632.5', 964 '73.0.3632.4', 965 '73.0.3632.3', 966 '73.0.3632.2', 967 '73.0.3632.1', 968 '73.0.3632.0', 969 '72.0.3626.8', 970 '71.0.3578.87', 971 '73.0.3631.2', 972 '73.0.3631.1', 973 '73.0.3631.0', 974 '72.0.3626.7', 975 '71.0.3578.86', 976 '72.0.3626.6', 977 '73.0.3630.1', 978 '73.0.3630.0', 979 '72.0.3626.5', 980 '71.0.3578.85', 981 '72.0.3626.4', 982 '73.0.3628.3', 983 '73.0.3628.2', 984 '73.0.3629.1', 985 '73.0.3629.0', 986 '72.0.3626.3', 987 '71.0.3578.84', 988 '73.0.3628.1', 989 '73.0.3628.0', 990 '71.0.3578.83', 991 '73.0.3627.1', 992 '73.0.3627.0', 993 '72.0.3626.2', 994 '71.0.3578.82', 995 '71.0.3578.81', 996 '71.0.3578.80', 997 '72.0.3626.1', 998 '72.0.3626.0', 999 '71.0.3578.79', 1000 '70.0.3538.124', 1001 '71.0.3578.78', 1002 '72.0.3623.4', 1003 '72.0.3625.2', 1004 '72.0.3625.1', 1005 '72.0.3625.0', 1006 '71.0.3578.77', 1007 '70.0.3538.123', 1008 '72.0.3624.4', 1009 '72.0.3624.3', 1010 '72.0.3624.2', 1011 '71.0.3578.76', 1012 '72.0.3624.1', 1013 '72.0.3624.0', 1014 '72.0.3623.3', 1015 '71.0.3578.75', 1016 '70.0.3538.122', 1017 '71.0.3578.74', 1018 '72.0.3623.2', 1019 '72.0.3610.3', 1020 '72.0.3623.1', 1021 '72.0.3623.0', 1022 '72.0.3622.3', 1023 '72.0.3622.2', 1024 '71.0.3578.73', 1025 '70.0.3538.121', 1026 '72.0.3622.1', 1027 '72.0.3622.0', 1028 '71.0.3578.72', 1029 '70.0.3538.120', 1030 '72.0.3621.1', 1031 '72.0.3621.0', 1032 '71.0.3578.71', 1033 '70.0.3538.119', 1034 '72.0.3620.1', 1035 '72.0.3620.0', 1036 '71.0.3578.70', 1037 '70.0.3538.118', 1038 '71.0.3578.69', 1039 '72.0.3619.1', 1040 '72.0.3619.0', 1041 '71.0.3578.68', 1042 '70.0.3538.117', 1043 '71.0.3578.67', 1044 '72.0.3618.1', 1045 '72.0.3618.0', 1046 '71.0.3578.66', 1047 '70.0.3538.116', 1048 '72.0.3617.1', 1049 '72.0.3617.0', 1050 '71.0.3578.65', 1051 '70.0.3538.115', 1052 '72.0.3602.3', 1053 '71.0.3578.64', 1054 '72.0.3616.1', 1055 '72.0.3616.0', 1056 '71.0.3578.63', 1057 '70.0.3538.114', 1058 '71.0.3578.62', 1059 '72.0.3615.1', 1060 '72.0.3615.0', 1061 '71.0.3578.61', 1062 '70.0.3538.113', 1063 '72.0.3614.1', 1064 '72.0.3614.0', 1065 '71.0.3578.60', 1066 '70.0.3538.112', 1067 '72.0.3613.1', 1068 '72.0.3613.0', 1069 '71.0.3578.59', 1070 '70.0.3538.111', 1071 '72.0.3612.2', 1072 '72.0.3612.1', 1073 '72.0.3612.0', 1074 '70.0.3538.110', 1075 '71.0.3578.58', 1076 '70.0.3538.109', 1077 '72.0.3611.2', 1078 '72.0.3611.1', 1079 '72.0.3611.0', 1080 '71.0.3578.57', 1081 '70.0.3538.108', 1082 '72.0.3610.2', 1083 '71.0.3578.56', 1084 '71.0.3578.55', 1085 '72.0.3610.1', 1086 '72.0.3610.0', 1087 '71.0.3578.54', 1088 '70.0.3538.107', 1089 '71.0.3578.53', 1090 '72.0.3609.3', 1091 '71.0.3578.52', 1092 '72.0.3609.2', 1093 '71.0.3578.51', 1094 '72.0.3608.5', 1095 '72.0.3609.1', 1096 '72.0.3609.0', 1097 '71.0.3578.50', 1098 '70.0.3538.106', 1099 '72.0.3608.4', 1100 '72.0.3608.3', 1101 '72.0.3608.2', 1102 '71.0.3578.49', 1103 '72.0.3608.1', 1104 '72.0.3608.0', 1105 '70.0.3538.105', 1106 '71.0.3578.48', 1107 '72.0.3607.1', 1108 '72.0.3607.0', 1109 '71.0.3578.47', 1110 '70.0.3538.104', 1111 '72.0.3606.2', 1112 '72.0.3606.1', 1113 '72.0.3606.0', 1114 '71.0.3578.46', 1115 '70.0.3538.103', 1116 '70.0.3538.102', 1117 '72.0.3605.3', 1118 '72.0.3605.2', 1119 '72.0.3605.1', 1120 '72.0.3605.0', 1121 '71.0.3578.45', 1122 '70.0.3538.101', 1123 '71.0.3578.44', 1124 '71.0.3578.43', 1125 '70.0.3538.100', 1126 '70.0.3538.99', 1127 '71.0.3578.42', 1128 '72.0.3604.1', 1129 '72.0.3604.0', 1130 '71.0.3578.41', 1131 '70.0.3538.98', 1132 '71.0.3578.40', 1133 '72.0.3603.2', 1134 '72.0.3603.1', 1135 '72.0.3603.0', 1136 '71.0.3578.39', 1137 '70.0.3538.97', 1138 '72.0.3602.2', 1139 '71.0.3578.38', 1140 '71.0.3578.37', 1141 '72.0.3602.1', 1142 '72.0.3602.0', 1143 '71.0.3578.36', 1144 '70.0.3538.96', 1145 '72.0.3601.1', 1146 '72.0.3601.0', 1147 '71.0.3578.35', 1148 '70.0.3538.95', 1149 '72.0.3600.1', 1150 '72.0.3600.0', 1151 '71.0.3578.34', 1152 '70.0.3538.94', 1153 '72.0.3599.3', 1154 '72.0.3599.2', 1155 '72.0.3599.1', 1156 '72.0.3599.0', 1157 '71.0.3578.33', 1158 '70.0.3538.93', 1159 '72.0.3598.1', 1160 '72.0.3598.0', 1161 '71.0.3578.32', 1162 '70.0.3538.87', 1163 '72.0.3597.1', 1164 '72.0.3597.0', 1165 '72.0.3596.2', 1166 '71.0.3578.31', 1167 '70.0.3538.86', 1168 '71.0.3578.30', 1169 '71.0.3578.29', 1170 '72.0.3596.1', 1171 '72.0.3596.0', 1172 '71.0.3578.28', 1173 '70.0.3538.85', 1174 '72.0.3595.2', 1175 '72.0.3591.3', 1176 '72.0.3595.1', 1177 '72.0.3595.0', 1178 '71.0.3578.27', 1179 '70.0.3538.84', 1180 '72.0.3594.1', 1181 '72.0.3594.0', 1182 '71.0.3578.26', 1183 '70.0.3538.83', 1184 '72.0.3593.2', 1185 '72.0.3593.1', 1186 '72.0.3593.0', 1187 '71.0.3578.25', 1188 '70.0.3538.82', 1189 '72.0.3589.3', 1190 '72.0.3592.2', 1191 '72.0.3592.1', 1192 '72.0.3592.0', 1193 '71.0.3578.24', 1194 '72.0.3589.2', 1195 '70.0.3538.81', 1196 '70.0.3538.80', 1197 '72.0.3591.2', 1198 '72.0.3591.1', 1199 '72.0.3591.0', 1200 '71.0.3578.23', 1201 '70.0.3538.79', 1202 '71.0.3578.22', 1203 '72.0.3590.1', 1204 '72.0.3590.0', 1205 '71.0.3578.21', 1206 '70.0.3538.78', 1207 '70.0.3538.77', 1208 '72.0.3589.1', 1209 '72.0.3589.0', 1210 '71.0.3578.20', 1211 '70.0.3538.76', 1212 '71.0.3578.19', 1213 '70.0.3538.75', 1214 '72.0.3588.1', 1215 '72.0.3588.0', 1216 '71.0.3578.18', 1217 '70.0.3538.74', 1218 '72.0.3586.2', 1219 '72.0.3587.0', 1220 '71.0.3578.17', 1221 '70.0.3538.73', 1222 '72.0.3586.1', 1223 '72.0.3586.0', 1224 '71.0.3578.16', 1225 '70.0.3538.72', 1226 '72.0.3585.1', 1227 '72.0.3585.0', 1228 '71.0.3578.15', 1229 '70.0.3538.71', 1230 '71.0.3578.14', 1231 '72.0.3584.1', 1232 '72.0.3584.0', 1233 '71.0.3578.13', 1234 '70.0.3538.70', 1235 '72.0.3583.2', 1236 '71.0.3578.12', 1237 '72.0.3583.1', 1238 '72.0.3583.0', 1239 '71.0.3578.11', 1240 '70.0.3538.69', 1241 '71.0.3578.10', 1242 '72.0.3582.0', 1243 '72.0.3581.4', 1244 '71.0.3578.9', 1245 '70.0.3538.67', 1246 '72.0.3581.3', 1247 '72.0.3581.2', 1248 '72.0.3581.1', 1249 '72.0.3581.0', 1250 '71.0.3578.8', 1251 '70.0.3538.66', 1252 '72.0.3580.1', 1253 '72.0.3580.0', 1254 '71.0.3578.7', 1255 '70.0.3538.65', 1256 '71.0.3578.6', 1257 '72.0.3579.1', 1258 '72.0.3579.0', 1259 '71.0.3578.5', 1260 '70.0.3538.64', 1261 '71.0.3578.4', 1262 '71.0.3578.3', 1263 '71.0.3578.2', 1264 '71.0.3578.1', 1265 '71.0.3578.0', 1266 '70.0.3538.63', 1267 '69.0.3497.128', 1268 '70.0.3538.62', 1269 '70.0.3538.61', 1270 '70.0.3538.60', 1271 '70.0.3538.59', 1272 '71.0.3577.1', 1273 '71.0.3577.0', 1274 '70.0.3538.58', 1275 '69.0.3497.127', 1276 '71.0.3576.2', 1277 '71.0.3576.1', 1278 '71.0.3576.0', 1279 '70.0.3538.57', 1280 '70.0.3538.56', 1281 '71.0.3575.2', 1282 '70.0.3538.55', 1283 '69.0.3497.126', 1284 '70.0.3538.54', 1285 '71.0.3575.1', 1286 '71.0.3575.0', 1287 '71.0.3574.1', 1288 '71.0.3574.0', 1289 '70.0.3538.53', 1290 '69.0.3497.125', 1291 '70.0.3538.52', 1292 '71.0.3573.1', 1293 '71.0.3573.0', 1294 '70.0.3538.51', 1295 '69.0.3497.124', 1296 '71.0.3572.1', 1297 '71.0.3572.0', 1298 '70.0.3538.50', 1299 '69.0.3497.123', 1300 '71.0.3571.2', 1301 '70.0.3538.49', 1302 '69.0.3497.122', 1303 '71.0.3571.1', 1304 '71.0.3571.0', 1305 '70.0.3538.48', 1306 '69.0.3497.121', 1307 '71.0.3570.1', 1308 '71.0.3570.0', 1309 '70.0.3538.47', 1310 '69.0.3497.120', 1311 '71.0.3568.2', 1312 '71.0.3569.1', 1313 '71.0.3569.0', 1314 '70.0.3538.46', 1315 '69.0.3497.119', 1316 '70.0.3538.45', 1317 '71.0.3568.1', 1318 '71.0.3568.0', 1319 '70.0.3538.44', 1320 '69.0.3497.118', 1321 '70.0.3538.43', 1322 '70.0.3538.42', 1323 '71.0.3567.1', 1324 '71.0.3567.0', 1325 '70.0.3538.41', 1326 '69.0.3497.117', 1327 '71.0.3566.1', 1328 '71.0.3566.0', 1329 '70.0.3538.40', 1330 '69.0.3497.116', 1331 '71.0.3565.1', 1332 '71.0.3565.0', 1333 '70.0.3538.39', 1334 '69.0.3497.115', 1335 '71.0.3564.1', 1336 '71.0.3564.0', 1337 '70.0.3538.38', 1338 '69.0.3497.114', 1339 '71.0.3563.0', 1340 '71.0.3562.2', 1341 '70.0.3538.37', 1342 '69.0.3497.113', 1343 '70.0.3538.36', 1344 '70.0.3538.35', 1345 '71.0.3562.1', 1346 '71.0.3562.0', 1347 '70.0.3538.34', 1348 '69.0.3497.112', 1349 '70.0.3538.33', 1350 '71.0.3561.1', 1351 '71.0.3561.0', 1352 '70.0.3538.32', 1353 '69.0.3497.111', 1354 '71.0.3559.6', 1355 '71.0.3560.1', 1356 '71.0.3560.0', 1357 '71.0.3559.5', 1358 '71.0.3559.4', 1359 '70.0.3538.31', 1360 '69.0.3497.110', 1361 '71.0.3559.3', 1362 '70.0.3538.30', 1363 '69.0.3497.109', 1364 '71.0.3559.2', 1365 '71.0.3559.1', 1366 '71.0.3559.0', 1367 '70.0.3538.29', 1368 '69.0.3497.108', 1369 '71.0.3558.2', 1370 '71.0.3558.1', 1371 '71.0.3558.0', 1372 '70.0.3538.28', 1373 '69.0.3497.107', 1374 '71.0.3557.2', 1375 '71.0.3557.1', 1376 '71.0.3557.0', 1377 '70.0.3538.27', 1378 '69.0.3497.106', 1379 '71.0.3554.4', 1380 '70.0.3538.26', 1381 '71.0.3556.1', 1382 '71.0.3556.0', 1383 '70.0.3538.25', 1384 '71.0.3554.3', 1385 '69.0.3497.105', 1386 '71.0.3554.2', 1387 '70.0.3538.24', 1388 '69.0.3497.104', 1389 '71.0.3555.2', 1390 '70.0.3538.23', 1391 '71.0.3555.1', 1392 '71.0.3555.0', 1393 '70.0.3538.22', 1394 '69.0.3497.103', 1395 '71.0.3554.1', 1396 '71.0.3554.0', 1397 '70.0.3538.21', 1398 '69.0.3497.102', 1399 '71.0.3553.3', 1400 '70.0.3538.20', 1401 '69.0.3497.101', 1402 '71.0.3553.2', 1403 '69.0.3497.100', 1404 '71.0.3553.1', 1405 '71.0.3553.0', 1406 '70.0.3538.19', 1407 '69.0.3497.99', 1408 '69.0.3497.98', 1409 '69.0.3497.97', 1410 '71.0.3552.6', 1411 '71.0.3552.5', 1412 '71.0.3552.4', 1413 '71.0.3552.3', 1414 '71.0.3552.2', 1415 '71.0.3552.1', 1416 '71.0.3552.0', 1417 '70.0.3538.18', 1418 '69.0.3497.96', 1419 '71.0.3551.3', 1420 '71.0.3551.2', 1421 '71.0.3551.1', 1422 '71.0.3551.0', 1423 '70.0.3538.17', 1424 '69.0.3497.95', 1425 '71.0.3550.3', 1426 '71.0.3550.2', 1427 '71.0.3550.1', 1428 '71.0.3550.0', 1429 '70.0.3538.16', 1430 '69.0.3497.94', 1431 '71.0.3549.1', 1432 '71.0.3549.0', 1433 '70.0.3538.15', 1434 '69.0.3497.93', 1435 '69.0.3497.92', 1436 '71.0.3548.1', 1437 '71.0.3548.0', 1438 '70.0.3538.14', 1439 '69.0.3497.91', 1440 '71.0.3547.1', 1441 '71.0.3547.0', 1442 '70.0.3538.13', 1443 '69.0.3497.90', 1444 '71.0.3546.2', 1445 '69.0.3497.89', 1446 '71.0.3546.1', 1447 '71.0.3546.0', 1448 '70.0.3538.12', 1449 '69.0.3497.88', 1450 '71.0.3545.4', 1451 '71.0.3545.3', 1452 '71.0.3545.2', 1453 '71.0.3545.1', 1454 '71.0.3545.0', 1455 '70.0.3538.11', 1456 '69.0.3497.87', 1457 '71.0.3544.5', 1458 '71.0.3544.4', 1459 '71.0.3544.3', 1460 '71.0.3544.2', 1461 '71.0.3544.1', 1462 '71.0.3544.0', 1463 '69.0.3497.86', 1464 '70.0.3538.10', 1465 '69.0.3497.85', 1466 '70.0.3538.9', 1467 '69.0.3497.84', 1468 '71.0.3543.4', 1469 '70.0.3538.8', 1470 '71.0.3543.3', 1471 '71.0.3543.2', 1472 '71.0.3543.1', 1473 '71.0.3543.0', 1474 '70.0.3538.7', 1475 '69.0.3497.83', 1476 '71.0.3542.2', 1477 '71.0.3542.1', 1478 '71.0.3542.0', 1479 '70.0.3538.6', 1480 '69.0.3497.82', 1481 '69.0.3497.81', 1482 '71.0.3541.1', 1483 '71.0.3541.0', 1484 '70.0.3538.5', 1485 '69.0.3497.80', 1486 '71.0.3540.1', 1487 '71.0.3540.0', 1488 '70.0.3538.4', 1489 '69.0.3497.79', 1490 '70.0.3538.3', 1491 '71.0.3539.1', 1492 '71.0.3539.0', 1493 '69.0.3497.78', 1494 '68.0.3440.134', 1495 '69.0.3497.77', 1496 '70.0.3538.2', 1497 '70.0.3538.1', 1498 '70.0.3538.0', 1499 '69.0.3497.76', 1500 '68.0.3440.133', 1501 '69.0.3497.75', 1502 '70.0.3537.2', 1503 '70.0.3537.1', 1504 '70.0.3537.0', 1505 '69.0.3497.74', 1506 '68.0.3440.132', 1507 '70.0.3536.0', 1508 '70.0.3535.5', 1509 '70.0.3535.4', 1510 '70.0.3535.3', 1511 '69.0.3497.73', 1512 '68.0.3440.131', 1513 '70.0.3532.8', 1514 '70.0.3532.7', 1515 '69.0.3497.72', 1516 '69.0.3497.71', 1517 '70.0.3535.2', 1518 '70.0.3535.1', 1519 '70.0.3535.0', 1520 '69.0.3497.70', 1521 '68.0.3440.130', 1522 '69.0.3497.69', 1523 '68.0.3440.129', 1524 '70.0.3534.4', 1525 '70.0.3534.3', 1526 '70.0.3534.2', 1527 '70.0.3534.1', 1528 '70.0.3534.0', 1529 '69.0.3497.68', 1530 '68.0.3440.128', 1531 '70.0.3533.2', 1532 '70.0.3533.1', 1533 '70.0.3533.0', 1534 '69.0.3497.67', 1535 '68.0.3440.127', 1536 '70.0.3532.6', 1537 '70.0.3532.5', 1538 '70.0.3532.4', 1539 '69.0.3497.66', 1540 '68.0.3440.126', 1541 '70.0.3532.3', 1542 '70.0.3532.2', 1543 '70.0.3532.1', 1544 '69.0.3497.60', 1545 '69.0.3497.65', 1546 '69.0.3497.64', 1547 '70.0.3532.0', 1548 '70.0.3531.0', 1549 '70.0.3530.4', 1550 '70.0.3530.3', 1551 '70.0.3530.2', 1552 '69.0.3497.58', 1553 '68.0.3440.125', 1554 '69.0.3497.57', 1555 '69.0.3497.56', 1556 '69.0.3497.55', 1557 '69.0.3497.54', 1558 '70.0.3530.1', 1559 '70.0.3530.0', 1560 '69.0.3497.53', 1561 '68.0.3440.124', 1562 '69.0.3497.52', 1563 '70.0.3529.3', 1564 '70.0.3529.2', 1565 '70.0.3529.1', 1566 '70.0.3529.0', 1567 '69.0.3497.51', 1568 '70.0.3528.4', 1569 '68.0.3440.123', 1570 '70.0.3528.3', 1571 '70.0.3528.2', 1572 '70.0.3528.1', 1573 '70.0.3528.0', 1574 '69.0.3497.50', 1575 '68.0.3440.122', 1576 '70.0.3527.1', 1577 '70.0.3527.0', 1578 '69.0.3497.49', 1579 '68.0.3440.121', 1580 '70.0.3526.1', 1581 '70.0.3526.0', 1582 '68.0.3440.120', 1583 '69.0.3497.48', 1584 '69.0.3497.47', 1585 '68.0.3440.119', 1586 '68.0.3440.118', 1587 '70.0.3525.5', 1588 '70.0.3525.4', 1589 '70.0.3525.3', 1590 '68.0.3440.117', 1591 '69.0.3497.46', 1592 '70.0.3525.2', 1593 '70.0.3525.1', 1594 '70.0.3525.0', 1595 '69.0.3497.45', 1596 '68.0.3440.116', 1597 '70.0.3524.4', 1598 '70.0.3524.3', 1599 '69.0.3497.44', 1600 '70.0.3524.2', 1601 '70.0.3524.1', 1602 '70.0.3524.0', 1603 '70.0.3523.2', 1604 '69.0.3497.43', 1605 '68.0.3440.115', 1606 '70.0.3505.9', 1607 '69.0.3497.42', 1608 '70.0.3505.8', 1609 '70.0.3523.1', 1610 '70.0.3523.0', 1611 '69.0.3497.41', 1612 '68.0.3440.114', 1613 '70.0.3505.7', 1614 '69.0.3497.40', 1615 '70.0.3522.1', 1616 '70.0.3522.0', 1617 '70.0.3521.2', 1618 '69.0.3497.39', 1619 '68.0.3440.113', 1620 '70.0.3505.6', 1621 '70.0.3521.1', 1622 '70.0.3521.0', 1623 '69.0.3497.38', 1624 '68.0.3440.112', 1625 '70.0.3520.1', 1626 '70.0.3520.0', 1627 '69.0.3497.37', 1628 '68.0.3440.111', 1629 '70.0.3519.3', 1630 '70.0.3519.2', 1631 '70.0.3519.1', 1632 '70.0.3519.0', 1633 '69.0.3497.36', 1634 '68.0.3440.110', 1635 '70.0.3518.1', 1636 '70.0.3518.0', 1637 '69.0.3497.35', 1638 '69.0.3497.34', 1639 '68.0.3440.109', 1640 '70.0.3517.1', 1641 '70.0.3517.0', 1642 '69.0.3497.33', 1643 '68.0.3440.108', 1644 '69.0.3497.32', 1645 '70.0.3516.3', 1646 '70.0.3516.2', 1647 '70.0.3516.1', 1648 '70.0.3516.0', 1649 '69.0.3497.31', 1650 '68.0.3440.107', 1651 '70.0.3515.4', 1652 '68.0.3440.106', 1653 '70.0.3515.3', 1654 '70.0.3515.2', 1655 '70.0.3515.1', 1656 '70.0.3515.0', 1657 '69.0.3497.30', 1658 '68.0.3440.105', 1659 '68.0.3440.104', 1660 '70.0.3514.2', 1661 '70.0.3514.1', 1662 '70.0.3514.0', 1663 '69.0.3497.29', 1664 '68.0.3440.103', 1665 '70.0.3513.1', 1666 '70.0.3513.0', 1667 '69.0.3497.28', 1668 ) 1669 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS) 1670 1671 1672 std_headers = { 1673 'User-Agent': random_user_agent(), 1674 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 1675 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 1676 'Accept-Encoding': 'gzip, deflate', 1677 'Accept-Language': 'en-us,en;q=0.5', 1678 } 1679 1680 1681 USER_AGENTS = { 1682 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27', 1683 } 1684 1685 1686 NO_DEFAULT = object() 1687 1688 ENGLISH_MONTH_NAMES = [ 1689 'January', 'February', 'March', 'April', 'May', 'June', 1690 'July', 'August', 'September', 'October', 'November', 'December'] 1691 1692 MONTH_NAMES = { 1693 'en': ENGLISH_MONTH_NAMES, 1694 'fr': [ 1695 'janvier', 'février', 'mars', 'avril', 'mai', 'juin', 1696 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'], 1697 } 1698 1699 KNOWN_EXTENSIONS = ( 1700 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac', 1701 'flv', 'f4v', 'f4a', 'f4b', 1702 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus', 1703 'mkv', 'mka', 'mk3d', 1704 'avi', 'divx', 1705 'mov', 1706 'asf', 'wmv', 'wma', 1707 '3gp', '3g2', 1708 'mp3', 1709 'flac', 1710 'ape', 1711 'wav', 1712 'f4f', 'f4m', 'm3u8', 'smil') 1713 1714 # needed for sanitizing filenames in restricted mode 1715 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', 1716 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'], 1717 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y'))) 1718 1719 DATE_FORMATS = ( 1720 '%d %B %Y', 1721 '%d %b %Y', 1722 '%B %d %Y', 1723 '%B %dst %Y', 1724 '%B %dnd %Y', 1725 '%B %drd %Y', 1726 '%B %dth %Y', 1727 '%b %d %Y', 1728 '%b %dst %Y', 1729 '%b %dnd %Y', 1730 '%b %drd %Y', 1731 '%b %dth %Y', 1732 '%b %dst %Y %I:%M', 1733 '%b %dnd %Y %I:%M', 1734 '%b %drd %Y %I:%M', 1735 '%b %dth %Y %I:%M', 1736 '%Y %m %d', 1737 '%Y-%m-%d', 1738 '%Y/%m/%d', 1739 '%Y/%m/%d %H:%M', 1740 '%Y/%m/%d %H:%M:%S', 1741 '%Y-%m-%d %H:%M', 1742 '%Y-%m-%d %H:%M:%S', 1743 '%Y-%m-%d %H:%M:%S.%f', 1744 '%d.%m.%Y %H:%M', 1745 '%d.%m.%Y %H.%M', 1746 '%Y-%m-%dT%H:%M:%SZ', 1747 '%Y-%m-%dT%H:%M:%S.%fZ', 1748 '%Y-%m-%dT%H:%M:%S.%f0Z', 1749 '%Y-%m-%dT%H:%M:%S', 1750 '%Y-%m-%dT%H:%M:%S.%f', 1751 '%Y-%m-%dT%H:%M', 1752 '%b %d %Y at %H:%M', 1753 '%b %d %Y at %H:%M:%S', 1754 '%B %d %Y at %H:%M', 1755 '%B %d %Y at %H:%M:%S', 1756 ) 1757 1758 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS) 1759 DATE_FORMATS_DAY_FIRST.extend([ 1760 '%d-%m-%Y', 1761 '%d.%m.%Y', 1762 '%d.%m.%y', 1763 '%d/%m/%Y', 1764 '%d/%m/%y', 1765 '%d/%m/%Y %H:%M:%S', 1766 ]) 1767 1768 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS) 1769 DATE_FORMATS_MONTH_FIRST.extend([ 1770 '%m-%d-%Y', 1771 '%m.%d.%Y', 1772 '%m/%d/%Y', 1773 '%m/%d/%y', 1774 '%m/%d/%Y %H:%M:%S', 1775 ]) 1776 1777 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)" 1778 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>' 1779 1780 1781 def preferredencoding(): 1782 """Get preferred encoding. 1783 1784 Returns the best encoding scheme for the system, based on 1785 locale.getpreferredencoding() and some further tweaks. 1786 """ 1787 try: 1788 pref = locale.getpreferredencoding() 1789 'TEST'.encode(pref) 1790 except Exception: 1791 pref = 'UTF-8' 1792 1793 return pref 1794 1795 1796 def write_json_file(obj, fn): 1797 """ Encode obj as JSON and write it to fn, atomically if possible """ 1798 1799 fn = encodeFilename(fn) 1800 if sys.version_info < (3, 0) and sys.platform != 'win32': 1801 encoding = get_filesystem_encoding() 1802 # os.path.basename returns a bytes object, but NamedTemporaryFile 1803 # will fail if the filename contains non ascii characters unless we 1804 # use a unicode object 1805 path_basename = lambda f: os.path.basename(fn).decode(encoding) 1806 # the same for os.path.dirname 1807 path_dirname = lambda f: os.path.dirname(fn).decode(encoding) 1808 else: 1809 path_basename = os.path.basename 1810 path_dirname = os.path.dirname 1811 1812 args = { 1813 'suffix': '.tmp', 1814 'prefix': path_basename(fn) + '.', 1815 'dir': path_dirname(fn), 1816 'delete': False, 1817 } 1818 1819 # In Python 2.x, json.dump expects a bytestream. 1820 # In Python 3.x, it writes to a character stream 1821 if sys.version_info < (3, 0): 1822 args['mode'] = 'wb' 1823 else: 1824 args.update({ 1825 'mode': 'w', 1826 'encoding': 'utf-8', 1827 }) 1828 1829 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args)) 1830 1831 try: 1832 with tf: 1833 json.dump(obj, tf) 1834 if sys.platform == 'win32': 1835 # Need to remove existing file on Windows, else os.rename raises 1836 # WindowsError or FileExistsError. 1837 try: 1838 os.unlink(fn) 1839 except OSError: 1840 pass 1841 try: 1842 mask = os.umask(0) 1843 os.umask(mask) 1844 os.chmod(tf.name, 0o666 & ~mask) 1845 except OSError: 1846 pass 1847 os.rename(tf.name, fn) 1848 except Exception: 1849 try: 1850 os.remove(tf.name) 1851 except OSError: 1852 pass 1853 raise 1854 1855 1856 if sys.version_info >= (2, 7): 1857 def find_xpath_attr(node, xpath, key, val=None): 1858 """ Find the xpath xpath[@key=val] """ 1859 assert re.match(r'^[a-zA-Z_-]+$', key) 1860 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val)) 1861 return node.find(expr) 1862 else: 1863 def find_xpath_attr(node, xpath, key, val=None): 1864 for f in node.findall(compat_xpath(xpath)): 1865 if key not in f.attrib: 1866 continue 1867 if val is None or f.attrib.get(key) == val: 1868 return f 1869 return None 1870 1871 # On python2.6 the xml.etree.ElementTree.Element methods don't support 1872 # the namespace parameter 1873 1874 1875 def xpath_with_ns(path, ns_map): 1876 components = [c.split(':') for c in path.split('/')] 1877 replaced = [] 1878 for c in components: 1879 if len(c) == 1: 1880 replaced.append(c[0]) 1881 else: 1882 ns, tag = c 1883 replaced.append('{%s}%s' % (ns_map[ns], tag)) 1884 return '/'.join(replaced) 1885 1886 1887 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT): 1888 def _find_xpath(xpath): 1889 return node.find(compat_xpath(xpath)) 1890 1891 if isinstance(xpath, (str, compat_str)): 1892 n = _find_xpath(xpath) 1893 else: 1894 for xp in xpath: 1895 n = _find_xpath(xp) 1896 if n is not None: 1897 break 1898 1899 if n is None: 1900 if default is not NO_DEFAULT: 1901 return default 1902 elif fatal: 1903 name = xpath if name is None else name 1904 raise ExtractorError('Could not find XML element %s' % name) 1905 else: 1906 return None 1907 return n 1908 1909 1910 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT): 1911 n = xpath_element(node, xpath, name, fatal=fatal, default=default) 1912 if n is None or n == default: 1913 return n 1914 if n.text is None: 1915 if default is not NO_DEFAULT: 1916 return default 1917 elif fatal: 1918 name = xpath if name is None else name 1919 raise ExtractorError('Could not find XML element\'s text %s' % name) 1920 else: 1921 return None 1922 return n.text 1923 1924 1925 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT): 1926 n = find_xpath_attr(node, xpath, key) 1927 if n is None: 1928 if default is not NO_DEFAULT: 1929 return default 1930 elif fatal: 1931 name = '%s[@%s]' % (xpath, key) if name is None else name 1932 raise ExtractorError('Could not find XML attribute %s' % name) 1933 else: 1934 return None 1935 return n.attrib[key] 1936 1937 1938 def get_element_by_id(id, html): 1939 """Return the content of the tag with the specified ID in the passed HTML document""" 1940 return get_element_by_attribute('id', id, html) 1941 1942 1943 def get_element_by_class(class_name, html): 1944 """Return the content of the first tag with the specified class in the passed HTML document""" 1945 retval = get_elements_by_class(class_name, html) 1946 return retval[0] if retval else None 1947 1948 1949 def get_element_by_attribute(attribute, value, html, escape_value=True): 1950 retval = get_elements_by_attribute(attribute, value, html, escape_value) 1951 return retval[0] if retval else None 1952 1953 1954 def get_elements_by_class(class_name, html): 1955 """Return the content of all tags with the specified class in the passed HTML document as a list""" 1956 return get_elements_by_attribute( 1957 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name), 1958 html, escape_value=False) 1959 1960 1961 def get_elements_by_attribute(attribute, value, html, escape_value=True): 1962 """Return the content of the tag with the specified attribute in the passed HTML document""" 1963 1964 value = re.escape(value) if escape_value else value 1965 1966 retlist = [] 1967 for m in re.finditer(r'''(?xs) 1968 <([a-zA-Z0-9:._-]+) 1969 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*? 1970 \s+%s=['"]?%s['"]? 1971 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*? 1972 \s*> 1973 (?P<content>.*?) 1974 </\1> 1975 ''' % (re.escape(attribute), value), html): 1976 res = m.group('content') 1977 1978 if res.startswith('"') or res.startswith("'"): 1979 res = res[1:-1] 1980 1981 retlist.append(unescapeHTML(res)) 1982 1983 return retlist 1984 1985 1986 class HTMLAttributeParser(compat_HTMLParser): 1987 """Trivial HTML parser to gather the attributes for a single element""" 1988 def __init__(self): 1989 self.attrs = {} 1990 compat_HTMLParser.__init__(self) 1991 1992 def handle_starttag(self, tag, attrs): 1993 self.attrs = dict(attrs) 1994 1995 1996 def extract_attributes(html_element): 1997 """Given a string for an HTML element such as 1998 <el 1999 a="foo" B="bar" c="&98;az" d=boz 2000 empty= noval entity="&" 2001 sq='"' dq="'" 2002 > 2003 Decode and return a dictionary of attributes. 2004 { 2005 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz', 2006 'empty': '', 'noval': None, 'entity': '&', 2007 'sq': '"', 'dq': '\'' 2008 }. 2009 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions, 2010 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. 2011 """ 2012 parser = HTMLAttributeParser() 2013 try: 2014 parser.feed(html_element) 2015 parser.close() 2016 # Older Python may throw HTMLParseError in case of malformed HTML 2017 except compat_HTMLParseError: 2018 pass 2019 return parser.attrs 2020 2021 2022 def clean_html(html): 2023 """Clean an HTML snippet into a readable string""" 2024 2025 if html is None: # Convenience for sanitizing descriptions etc. 2026 return html 2027 2028 # Newline vs <br /> 2029 html = html.replace('\n', ' ') 2030 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html) 2031 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html) 2032 # Strip html tags 2033 html = re.sub('<.*?>', '', html) 2034 # Replace html entities 2035 html = unescapeHTML(html) 2036 return html.strip() 2037 2038 2039 def sanitize_open(filename, open_mode): 2040 """Try to open the given filename, and slightly tweak it if this fails. 2041 2042 Attempts to open the given filename. If this fails, it tries to change 2043 the filename slightly, step by step, until it's either able to open it 2044 or it fails and raises a final exception, like the standard open() 2045 function. 2046 2047 It returns the tuple (stream, definitive_file_name). 2048 """ 2049 try: 2050 if filename == '-': 2051 if sys.platform == 'win32': 2052 import msvcrt 2053 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) 2054 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename) 2055 stream = open(encodeFilename(filename), open_mode) 2056 return (stream, filename) 2057 except (IOError, OSError) as err: 2058 if err.errno in (errno.EACCES,): 2059 raise 2060 2061 # In case of error, try to remove win32 forbidden chars 2062 alt_filename = sanitize_path(filename) 2063 if alt_filename == filename: 2064 raise 2065 else: 2066 # An exception here should be caught in the caller 2067 stream = open(encodeFilename(alt_filename), open_mode) 2068 return (stream, alt_filename) 2069 2070 2071 def timeconvert(timestr): 2072 """Convert RFC 2822 defined time string into system timestamp""" 2073 timestamp = None 2074 timetuple = email.utils.parsedate_tz(timestr) 2075 if timetuple is not None: 2076 timestamp = email.utils.mktime_tz(timetuple) 2077 return timestamp 2078 2079 2080 def sanitize_filename(s, restricted=False, is_id=False): 2081 """Sanitizes a string so it could be used as part of a filename. 2082 If restricted is set, use a stricter subset of allowed characters. 2083 Set is_id if this is not an arbitrary string, but an ID that should be kept 2084 if possible. 2085 """ 2086 def replace_insane(char): 2087 if restricted and char in ACCENT_CHARS: 2088 return ACCENT_CHARS[char] 2089 if char == '?' or ord(char) < 32 or ord(char) == 127: 2090 return '' 2091 elif char == '"': 2092 return '' if restricted else '\'' 2093 elif char == ':': 2094 return '_-' if restricted else ' -' 2095 elif char in '\\/|*<>': 2096 return '_' 2097 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()): 2098 return '_' 2099 if restricted and ord(char) > 127: 2100 return '_' 2101 return char 2102 2103 # Handle timestamps 2104 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) 2105 result = ''.join(map(replace_insane, s)) 2106 if not is_id: 2107 while '__' in result: 2108 result = result.replace('__', '_') 2109 result = result.strip('_') 2110 # Common case of "Foreign band name - English song title" 2111 if restricted and result.startswith('-_'): 2112 result = result[2:] 2113 if result.startswith('-'): 2114 result = '_' + result[len('-'):] 2115 result = result.lstrip('.') 2116 if not result: 2117 result = '_' 2118 return result 2119 2120 2121 def sanitize_path(s): 2122 """Sanitizes and normalizes path on Windows""" 2123 if sys.platform != 'win32': 2124 return s 2125 drive_or_unc, _ = os.path.splitdrive(s) 2126 if sys.version_info < (2, 7) and not drive_or_unc: 2127 drive_or_unc, _ = os.path.splitunc(s) 2128 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep) 2129 if drive_or_unc: 2130 norm_path.pop(0) 2131 sanitized_path = [ 2132 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part) 2133 for path_part in norm_path] 2134 if drive_or_unc: 2135 sanitized_path.insert(0, drive_or_unc + os.path.sep) 2136 return os.path.join(*sanitized_path) 2137 2138 2139 def sanitize_url(url): 2140 # Prepend protocol-less URLs with `http:` scheme in order to mitigate 2141 # the number of unwanted failures due to missing protocol 2142 if url.startswith('//'): 2143 return 'http:%s' % url 2144 # Fix some common typos seen so far 2145 COMMON_TYPOS = ( 2146 # https://github.com/ytdl-org/youtube-dl/issues/15649 2147 (r'^httpss://', r'https://'), 2148 # https://bx1.be/lives/direct-tv/ 2149 (r'^rmtp([es]?)://', r'rtmp\1://'), 2150 ) 2151 for mistake, fixup in COMMON_TYPOS: 2152 if re.match(mistake, url): 2153 return re.sub(mistake, fixup, url) 2154 return url 2155 2156 2157 def sanitized_Request(url, *args, **kwargs): 2158 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs) 2159 2160 2161 def expand_path(s): 2162 """Expand shell variables and ~""" 2163 return os.path.expandvars(compat_expanduser(s)) 2164 2165 2166 def orderedSet(iterable): 2167 """ Remove all duplicates from the input iterable """ 2168 res = [] 2169 for el in iterable: 2170 if el not in res: 2171 res.append(el) 2172 return res 2173 2174 2175 def _htmlentity_transform(entity_with_semicolon): 2176 """Transforms an HTML entity to a character.""" 2177 entity = entity_with_semicolon[:-1] 2178 2179 # Known non-numeric HTML entity 2180 if entity in compat_html_entities.name2codepoint: 2181 return compat_chr(compat_html_entities.name2codepoint[entity]) 2182 2183 # TODO: HTML5 allows entities without a semicolon. For example, 2184 # 'Éric' should be decoded as 'Éric'. 2185 if entity_with_semicolon in compat_html_entities_html5: 2186 return compat_html_entities_html5[entity_with_semicolon] 2187 2188 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity) 2189 if mobj is not None: 2190 numstr = mobj.group(1) 2191 if numstr.startswith('x'): 2192 base = 16 2193 numstr = '0%s' % numstr 2194 else: 2195 base = 10 2196 # See https://github.com/ytdl-org/youtube-dl/issues/7518 2197 try: 2198 return compat_chr(int(numstr, base)) 2199 except ValueError: 2200 pass 2201 2202 # Unknown entity in name, return its literal representation 2203 return '&%s;' % entity 2204 2205 2206 def unescapeHTML(s): 2207 if s is None: 2208 return None 2209 assert type(s) == compat_str 2210 2211 return re.sub( 2212 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s) 2213 2214 2215 def get_subprocess_encoding(): 2216 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5: 2217 # For subprocess calls, encode with locale encoding 2218 # Refer to http://stackoverflow.com/a/9951851/35070 2219 encoding = preferredencoding() 2220 else: 2221 encoding = sys.getfilesystemencoding() 2222 if encoding is None: 2223 encoding = 'utf-8' 2224 return encoding 2225 2226 2227 def encodeFilename(s, for_subprocess=False): 2228 """ 2229 @param s The name of the file 2230 """ 2231 2232 assert type(s) == compat_str 2233 2234 # Python 3 has a Unicode API 2235 if sys.version_info >= (3, 0): 2236 return s 2237 2238 # Pass '' directly to use Unicode APIs on Windows 2000 and up 2239 # (Detecting Windows NT 4 is tricky because 'major >= 4' would 2240 # match Windows 9x series as well. Besides, NT 4 is obsolete.) 2241 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5: 2242 return s 2243 2244 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible 2245 if sys.platform.startswith('java'): 2246 return s 2247 2248 return s.encode(get_subprocess_encoding(), 'ignore') 2249 2250 2251 def decodeFilename(b, for_subprocess=False): 2252 2253 if sys.version_info >= (3, 0): 2254 return b 2255 2256 if not isinstance(b, bytes): 2257 return b 2258 2259 return b.decode(get_subprocess_encoding(), 'ignore') 2260 2261 2262 def encodeArgument(s): 2263 if not isinstance(s, compat_str): 2264 # Legacy code that uses byte strings 2265 # Uncomment the following line after fixing all post processors 2266 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s)) 2267 s = s.decode('ascii') 2268 return encodeFilename(s, True) 2269 2270 2271 def decodeArgument(b): 2272 return decodeFilename(b, True) 2273 2274 2275 def decodeOption(optval): 2276 if optval is None: 2277 return optval 2278 if isinstance(optval, bytes): 2279 optval = optval.decode(preferredencoding()) 2280 2281 assert isinstance(optval, compat_str) 2282 return optval 2283 2284 2285 def formatSeconds(secs): 2286 if secs > 3600: 2287 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60) 2288 elif secs > 60: 2289 return '%d:%02d' % (secs // 60, secs % 60) 2290 else: 2291 return '%d' % secs 2292 2293 2294 def make_HTTPS_handler(params, **kwargs): 2295 opts_no_check_certificate = params.get('nocheckcertificate', False) 2296 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9 2297 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH) 2298 if opts_no_check_certificate: 2299 context.check_hostname = False 2300 context.verify_mode = ssl.CERT_NONE 2301 try: 2302 return YoutubeDLHTTPSHandler(params, context=context, **kwargs) 2303 except TypeError: 2304 # Python 2.7.8 2305 # (create_default_context present but HTTPSHandler has no context=) 2306 pass 2307 2308 if sys.version_info < (3, 2): 2309 return YoutubeDLHTTPSHandler(params, **kwargs) 2310 else: # Python < 3.4 2311 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1) 2312 context.verify_mode = (ssl.CERT_NONE 2313 if opts_no_check_certificate 2314 else ssl.CERT_REQUIRED) 2315 context.set_default_verify_paths() 2316 return YoutubeDLHTTPSHandler(params, context=context, **kwargs) 2317 2318 2319 def bug_reports_message(): 2320 if ytdl_is_updateable(): 2321 update_cmd = 'type youtube-dl -U to update' 2322 else: 2323 update_cmd = 'see https://yt-dl.org/update on how to update' 2324 msg = '; please report this issue on https://yt-dl.org/bug .' 2325 msg += ' Make sure you are using the latest version; %s.' % update_cmd 2326 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.' 2327 return msg 2328 2329 2330 class YoutubeDLError(Exception): 2331 """Base exception for YoutubeDL errors.""" 2332 pass 2333 2334 2335 class ExtractorError(YoutubeDLError): 2336 """Error during info extraction.""" 2337 2338 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None): 2339 """ tb, if given, is the original traceback (so that it can be printed out). 2340 If expected is set, this is a normal error message and most likely not a bug in youtube-dl. 2341 """ 2342 2343 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): 2344 expected = True 2345 if video_id is not None: 2346 msg = video_id + ': ' + msg 2347 if cause: 2348 msg += ' (caused by %r)' % cause 2349 if not expected: 2350 msg += bug_reports_message() 2351 super(ExtractorError, self).__init__(msg) 2352 2353 self.traceback = tb 2354 self.exc_info = sys.exc_info() # preserve original exception 2355 self.cause = cause 2356 self.video_id = video_id 2357 2358 def format_traceback(self): 2359 if self.traceback is None: 2360 return None 2361 return ''.join(traceback.format_tb(self.traceback)) 2362 2363 2364 class UnsupportedError(ExtractorError): 2365 def __init__(self, url): 2366 super(UnsupportedError, self).__init__( 2367 'Unsupported URL: %s' % url, expected=True) 2368 self.url = url 2369 2370 2371 class RegexNotFoundError(ExtractorError): 2372 """Error when a regex didn't match""" 2373 pass 2374 2375 2376 class GeoRestrictedError(ExtractorError): 2377 """Geographic restriction Error exception. 2378 2379 This exception may be thrown when a video is not available from your 2380 geographic location due to geographic restrictions imposed by a website. 2381 """ 2382 def __init__(self, msg, countries=None): 2383 super(GeoRestrictedError, self).__init__(msg, expected=True) 2384 self.msg = msg 2385 self.countries = countries 2386 2387 2388 class DownloadError(YoutubeDLError): 2389 """Download Error exception. 2390 2391 This exception may be thrown by FileDownloader objects if they are not 2392 configured to continue on errors. They will contain the appropriate 2393 error message. 2394 """ 2395 2396 def __init__(self, msg, exc_info=None): 2397 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """ 2398 super(DownloadError, self).__init__(msg) 2399 self.exc_info = exc_info 2400 2401 2402 class SameFileError(YoutubeDLError): 2403 """Same File exception. 2404 2405 This exception will be thrown by FileDownloader objects if they detect 2406 multiple files would have to be downloaded to the same file on disk. 2407 """ 2408 pass 2409 2410 2411 class PostProcessingError(YoutubeDLError): 2412 """Post Processing exception. 2413 2414 This exception may be raised by PostProcessor's .run() method to 2415 indicate an error in the postprocessing task. 2416 """ 2417 2418 def __init__(self, msg): 2419 super(PostProcessingError, self).__init__(msg) 2420 self.msg = msg 2421 2422 2423 class MaxDownloadsReached(YoutubeDLError): 2424 """ --max-downloads limit has been reached. """ 2425 pass 2426 2427 2428 class UnavailableVideoError(YoutubeDLError): 2429 """Unavailable Format exception. 2430 2431 This exception will be thrown when a video is requested 2432 in a format that is not available for that video. 2433 """ 2434 pass 2435 2436 2437 class ContentTooShortError(YoutubeDLError): 2438 """Content Too Short exception. 2439 2440 This exception may be raised by FileDownloader objects when a file they 2441 download is too small for what the server announced first, indicating 2442 the connection was probably interrupted. 2443 """ 2444 2445 def __init__(self, downloaded, expected): 2446 super(ContentTooShortError, self).__init__( 2447 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected) 2448 ) 2449 # Both in bytes 2450 self.downloaded = downloaded 2451 self.expected = expected 2452 2453 2454 class XAttrMetadataError(YoutubeDLError): 2455 def __init__(self, code=None, msg='Unknown error'): 2456 super(XAttrMetadataError, self).__init__(msg) 2457 self.code = code 2458 self.msg = msg 2459 2460 # Parsing code and msg 2461 if (self.code in (errno.ENOSPC, errno.EDQUOT) 2462 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg): 2463 self.reason = 'NO_SPACE' 2464 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg: 2465 self.reason = 'VALUE_TOO_LONG' 2466 else: 2467 self.reason = 'NOT_SUPPORTED' 2468 2469 2470 class XAttrUnavailableError(YoutubeDLError): 2471 pass 2472 2473 2474 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs): 2475 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting 2476 # expected HTTP responses to meet HTTP/1.0 or later (see also 2477 # https://github.com/ytdl-org/youtube-dl/issues/6727) 2478 if sys.version_info < (3, 0): 2479 kwargs['strict'] = True 2480 hc = http_class(*args, **compat_kwargs(kwargs)) 2481 source_address = ydl_handler._params.get('source_address') 2482 2483 if source_address is not None: 2484 # This is to workaround _create_connection() from socket where it will try all 2485 # address data from getaddrinfo() including IPv6. This filters the result from 2486 # getaddrinfo() based on the source_address value. 2487 # This is based on the cpython socket.create_connection() function. 2488 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691 2489 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None): 2490 host, port = address 2491 err = None 2492 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM) 2493 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6 2494 ip_addrs = [addr for addr in addrs if addr[0] == af] 2495 if addrs and not ip_addrs: 2496 ip_version = 'v4' if af == socket.AF_INET else 'v6' 2497 raise socket.error( 2498 "No remote IP%s addresses available for connect, can't use '%s' as source address" 2499 % (ip_version, source_address[0])) 2500 for res in ip_addrs: 2501 af, socktype, proto, canonname, sa = res 2502 sock = None 2503 try: 2504 sock = socket.socket(af, socktype, proto) 2505 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: 2506 sock.settimeout(timeout) 2507 sock.bind(source_address) 2508 sock.connect(sa) 2509 err = None # Explicitly break reference cycle 2510 return sock 2511 except socket.error as _: 2512 err = _ 2513 if sock is not None: 2514 sock.close() 2515 if err is not None: 2516 raise err 2517 else: 2518 raise socket.error('getaddrinfo returns an empty list') 2519 if hasattr(hc, '_create_connection'): 2520 hc._create_connection = _create_connection 2521 sa = (source_address, 0) 2522 if hasattr(hc, 'source_address'): # Python 2.7+ 2523 hc.source_address = sa 2524 else: # Python 2.6 2525 def _hc_connect(self, *args, **kwargs): 2526 sock = _create_connection( 2527 (self.host, self.port), self.timeout, sa) 2528 if is_https: 2529 self.sock = ssl.wrap_socket( 2530 sock, self.key_file, self.cert_file, 2531 ssl_version=ssl.PROTOCOL_TLSv1) 2532 else: 2533 self.sock = sock 2534 hc.connect = functools.partial(_hc_connect, hc) 2535 2536 return hc 2537 2538 2539 def handle_youtubedl_headers(headers): 2540 filtered_headers = headers 2541 2542 if 'Youtubedl-no-compression' in filtered_headers: 2543 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding') 2544 del filtered_headers['Youtubedl-no-compression'] 2545 2546 return filtered_headers 2547 2548 2549 class YoutubeDLHandler(compat_urllib_request.HTTPHandler): 2550 """Handler for HTTP requests and responses. 2551 2552 This class, when installed with an OpenerDirector, automatically adds 2553 the standard headers to every HTTP request and handles gzipped and 2554 deflated responses from web servers. If compression is to be avoided in 2555 a particular request, the original request in the program code only has 2556 to include the HTTP header "Youtubedl-no-compression", which will be 2557 removed before making the real request. 2558 2559 Part of this code was copied from: 2560 2561 http://techknack.net/python-urllib2-handlers/ 2562 2563 Andrew Rowls, the author of that code, agreed to release it to the 2564 public domain. 2565 """ 2566 2567 def __init__(self, params, *args, **kwargs): 2568 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs) 2569 self._params = params 2570 2571 def http_open(self, req): 2572 conn_class = compat_http_client.HTTPConnection 2573 2574 socks_proxy = req.headers.get('Ytdl-socks-proxy') 2575 if socks_proxy: 2576 conn_class = make_socks_conn_class(conn_class, socks_proxy) 2577 del req.headers['Ytdl-socks-proxy'] 2578 2579 return self.do_open(functools.partial( 2580 _create_http_connection, self, conn_class, False), 2581 req) 2582 2583 @staticmethod 2584 def deflate(data): 2585 try: 2586 return zlib.decompress(data, -zlib.MAX_WBITS) 2587 except zlib.error: 2588 return zlib.decompress(data) 2589 2590 def http_request(self, req): 2591 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not 2592 # always respected by websites, some tend to give out URLs with non percent-encoded 2593 # non-ASCII characters (see telemb.py, ard.py [#3412]) 2594 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) 2595 # To work around aforementioned issue we will replace request's original URL with 2596 # percent-encoded one 2597 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09) 2598 # the code of this workaround has been moved here from YoutubeDL.urlopen() 2599 url = req.get_full_url() 2600 url_escaped = escape_url(url) 2601 2602 # Substitute URL if any change after escaping 2603 if url != url_escaped: 2604 req = update_Request(req, url=url_escaped) 2605 2606 for h, v in std_headers.items(): 2607 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 2608 # The dict keys are capitalized because of this bug by urllib 2609 if h.capitalize() not in req.headers: 2610 req.add_header(h, v) 2611 2612 req.headers = handle_youtubedl_headers(req.headers) 2613 2614 if sys.version_info < (2, 7) and '#' in req.get_full_url(): 2615 # Python 2.6 is brain-dead when it comes to fragments 2616 req._Request__original = req._Request__original.partition('#')[0] 2617 req._Request__r_type = req._Request__r_type.partition('#')[0] 2618 2619 return req 2620 2621 def http_response(self, req, resp): 2622 old_resp = resp 2623 # gzip 2624 if resp.headers.get('Content-encoding', '') == 'gzip': 2625 content = resp.read() 2626 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb') 2627 try: 2628 uncompressed = io.BytesIO(gz.read()) 2629 except IOError as original_ioerror: 2630 # There may be junk add the end of the file 2631 # See http://stackoverflow.com/q/4928560/35070 for details 2632 for i in range(1, 1024): 2633 try: 2634 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb') 2635 uncompressed = io.BytesIO(gz.read()) 2636 except IOError: 2637 continue 2638 break 2639 else: 2640 raise original_ioerror 2641 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code) 2642 resp.msg = old_resp.msg 2643 del resp.headers['Content-encoding'] 2644 # deflate 2645 if resp.headers.get('Content-encoding', '') == 'deflate': 2646 gz = io.BytesIO(self.deflate(resp.read())) 2647 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code) 2648 resp.msg = old_resp.msg 2649 del resp.headers['Content-encoding'] 2650 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see 2651 # https://github.com/ytdl-org/youtube-dl/issues/6457). 2652 if 300 <= resp.code < 400: 2653 location = resp.headers.get('Location') 2654 if location: 2655 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 2656 if sys.version_info >= (3, 0): 2657 location = location.encode('iso-8859-1').decode('utf-8') 2658 else: 2659 location = location.decode('utf-8') 2660 location_escaped = escape_url(location) 2661 if location != location_escaped: 2662 del resp.headers['Location'] 2663 if sys.version_info < (3, 0): 2664 location_escaped = location_escaped.encode('utf-8') 2665 resp.headers['Location'] = location_escaped 2666 return resp 2667 2668 https_request = http_request 2669 https_response = http_response 2670 2671 2672 def make_socks_conn_class(base_class, socks_proxy): 2673 assert issubclass(base_class, ( 2674 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection)) 2675 2676 url_components = compat_urlparse.urlparse(socks_proxy) 2677 if url_components.scheme.lower() == 'socks5': 2678 socks_type = ProxyType.SOCKS5 2679 elif url_components.scheme.lower() in ('socks', 'socks4'): 2680 socks_type = ProxyType.SOCKS4 2681 elif url_components.scheme.lower() == 'socks4a': 2682 socks_type = ProxyType.SOCKS4A 2683 2684 def unquote_if_non_empty(s): 2685 if not s: 2686 return s 2687 return compat_urllib_parse_unquote_plus(s) 2688 2689 proxy_args = ( 2690 socks_type, 2691 url_components.hostname, url_components.port or 1080, 2692 True, # Remote DNS 2693 unquote_if_non_empty(url_components.username), 2694 unquote_if_non_empty(url_components.password), 2695 ) 2696 2697 class SocksConnection(base_class): 2698 def connect(self): 2699 self.sock = sockssocket() 2700 self.sock.setproxy(*proxy_args) 2701 if type(self.timeout) in (int, float): 2702 self.sock.settimeout(self.timeout) 2703 self.sock.connect((self.host, self.port)) 2704 2705 if isinstance(self, compat_http_client.HTTPSConnection): 2706 if hasattr(self, '_context'): # Python > 2.6 2707 self.sock = self._context.wrap_socket( 2708 self.sock, server_hostname=self.host) 2709 else: 2710 self.sock = ssl.wrap_socket(self.sock) 2711 2712 return SocksConnection 2713 2714 2715 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler): 2716 def __init__(self, params, https_conn_class=None, *args, **kwargs): 2717 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs) 2718 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection 2719 self._params = params 2720 2721 def https_open(self, req): 2722 kwargs = {} 2723 conn_class = self._https_conn_class 2724 2725 if hasattr(self, '_context'): # python > 2.6 2726 kwargs['context'] = self._context 2727 if hasattr(self, '_check_hostname'): # python 3.x 2728 kwargs['check_hostname'] = self._check_hostname 2729 2730 socks_proxy = req.headers.get('Ytdl-socks-proxy') 2731 if socks_proxy: 2732 conn_class = make_socks_conn_class(conn_class, socks_proxy) 2733 del req.headers['Ytdl-socks-proxy'] 2734 2735 return self.do_open(functools.partial( 2736 _create_http_connection, self, conn_class, True), 2737 req, **kwargs) 2738 2739 2740 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): 2741 """ 2742 See [1] for cookie file format. 2743 2744 1. https://curl.haxx.se/docs/http-cookies.html 2745 """ 2746 _HTTPONLY_PREFIX = '#HttpOnly_' 2747 _ENTRY_LEN = 7 2748 _HEADER = '''# Netscape HTTP Cookie File 2749 # This file is generated by youtube-dl. Do not edit. 2750 2751 ''' 2752 _CookieFileEntry = collections.namedtuple( 2753 'CookieFileEntry', 2754 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value')) 2755 2756 def save(self, filename=None, ignore_discard=False, ignore_expires=False): 2757 """ 2758 Save cookies to a file. 2759 2760 Most of the code is taken from CPython 3.8 and slightly adapted 2761 to support cookie files with UTF-8 in both python 2 and 3. 2762 """ 2763 if filename is None: 2764 if self.filename is not None: 2765 filename = self.filename 2766 else: 2767 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT) 2768 2769 # Store session cookies with `expires` set to 0 instead of an empty 2770 # string 2771 for cookie in self: 2772 if cookie.expires is None: 2773 cookie.expires = 0 2774 2775 with io.open(filename, 'w', encoding='utf-8') as f: 2776 f.write(self._HEADER) 2777 now = time.time() 2778 for cookie in self: 2779 if not ignore_discard and cookie.discard: 2780 continue 2781 if not ignore_expires and cookie.is_expired(now): 2782 continue 2783 if cookie.secure: 2784 secure = 'TRUE' 2785 else: 2786 secure = 'FALSE' 2787 if cookie.domain.startswith('.'): 2788 initial_dot = 'TRUE' 2789 else: 2790 initial_dot = 'FALSE' 2791 if cookie.expires is not None: 2792 expires = compat_str(cookie.expires) 2793 else: 2794 expires = '' 2795 if cookie.value is None: 2796 # cookies.txt regards 'Set-Cookie: foo' as a cookie 2797 # with no name, whereas http.cookiejar regards it as a 2798 # cookie with no value. 2799 name = '' 2800 value = cookie.name 2801 else: 2802 name = cookie.name 2803 value = cookie.value 2804 f.write( 2805 '\t'.join([cookie.domain, initial_dot, cookie.path, 2806 secure, expires, name, value]) + '\n') 2807 2808 def load(self, filename=None, ignore_discard=False, ignore_expires=False): 2809 """Load cookies from a file.""" 2810 if filename is None: 2811 if self.filename is not None: 2812 filename = self.filename 2813 else: 2814 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT) 2815 2816 def prepare_line(line): 2817 if line.startswith(self._HTTPONLY_PREFIX): 2818 line = line[len(self._HTTPONLY_PREFIX):] 2819 # comments and empty lines are fine 2820 if line.startswith('#') or not line.strip(): 2821 return line 2822 cookie_list = line.split('\t') 2823 if len(cookie_list) != self._ENTRY_LEN: 2824 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list)) 2825 cookie = self._CookieFileEntry(*cookie_list) 2826 if cookie.expires_at and not cookie.expires_at.isdigit(): 2827 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at) 2828 return line 2829 2830 cf = io.StringIO() 2831 with io.open(filename, encoding='utf-8') as f: 2832 for line in f: 2833 try: 2834 cf.write(prepare_line(line)) 2835 except compat_cookiejar.LoadError as e: 2836 write_string( 2837 'WARNING: skipping cookie file entry due to %s: %r\n' 2838 % (e, line), sys.stderr) 2839 continue 2840 cf.seek(0) 2841 self._really_load(cf, filename, ignore_discard, ignore_expires) 2842 # Session cookies are denoted by either `expires` field set to 2843 # an empty string or 0. MozillaCookieJar only recognizes the former 2844 # (see [1]). So we need force the latter to be recognized as session 2845 # cookies on our own. 2846 # Session cookies may be important for cookies-based authentication, 2847 # e.g. usually, when user does not check 'Remember me' check box while 2848 # logging in on a site, some important cookies are stored as session 2849 # cookies so that not recognizing them will result in failed login. 2850 # 1. https://bugs.python.org/issue17164 2851 for cookie in self: 2852 # Treat `expires=0` cookies as session cookies 2853 if cookie.expires == 0: 2854 cookie.expires = None 2855 cookie.discard = True 2856 2857 2858 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor): 2859 def __init__(self, cookiejar=None): 2860 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar) 2861 2862 def http_response(self, request, response): 2863 # Python 2 will choke on next HTTP request in row if there are non-ASCII 2864 # characters in Set-Cookie HTTP header of last response (see 2865 # https://github.com/ytdl-org/youtube-dl/issues/6769). 2866 # In order to at least prevent crashing we will percent encode Set-Cookie 2867 # header before HTTPCookieProcessor starts processing it. 2868 # if sys.version_info < (3, 0) and response.headers: 2869 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'): 2870 # set_cookie = response.headers.get(set_cookie_header) 2871 # if set_cookie: 2872 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ") 2873 # if set_cookie != set_cookie_escaped: 2874 # del response.headers[set_cookie_header] 2875 # response.headers[set_cookie_header] = set_cookie_escaped 2876 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response) 2877 2878 https_request = compat_urllib_request.HTTPCookieProcessor.http_request 2879 https_response = http_response 2880 2881 2882 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler): 2883 """YoutubeDL redirect handler 2884 2885 The code is based on HTTPRedirectHandler implementation from CPython [1]. 2886 2887 This redirect handler solves two issues: 2888 - ensures redirect URL is always unicode under python 2 2889 - introduces support for experimental HTTP response status code 2890 308 Permanent Redirect [2] used by some sites [3] 2891 2892 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py 2893 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308 2894 3. https://github.com/ytdl-org/youtube-dl/issues/28768 2895 """ 2896 2897 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302 2898 2899 def redirect_request(self, req, fp, code, msg, headers, newurl): 2900 """Return a Request or None in response to a redirect. 2901 2902 This is called by the http_error_30x methods when a 2903 redirection response is received. If a redirection should 2904 take place, return a new Request to allow http_error_30x to 2905 perform the redirect. Otherwise, raise HTTPError if no-one 2906 else should try to handle this url. Return None if you can't 2907 but another Handler might. 2908 """ 2909 m = req.get_method() 2910 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD") 2911 or code in (301, 302, 303) and m == "POST")): 2912 raise compat_HTTPError(req.full_url, code, msg, headers, fp) 2913 # Strictly (according to RFC 2616), 301 or 302 in response to 2914 # a POST MUST NOT cause a redirection without confirmation 2915 # from the user (of urllib.request, in this case). In practice, 2916 # essentially all clients do redirect in this case, so we do 2917 # the same. 2918 2919 # On python 2 urlh.geturl() may sometimes return redirect URL 2920 # as byte string instead of unicode. This workaround allows 2921 # to force it always return unicode. 2922 if sys.version_info[0] < 3: 2923 newurl = compat_str(newurl) 2924 2925 # Be conciliant with URIs containing a space. This is mainly 2926 # redundant with the more complete encoding done in http_error_302(), 2927 # but it is kept for compatibility with other callers. 2928 newurl = newurl.replace(' ', '%20') 2929 2930 CONTENT_HEADERS = ("content-length", "content-type") 2931 # NB: don't use dict comprehension for python 2.6 compatibility 2932 newheaders = dict((k, v) for k, v in req.headers.items() 2933 if k.lower() not in CONTENT_HEADERS) 2934 return compat_urllib_request.Request( 2935 newurl, headers=newheaders, origin_req_host=req.origin_req_host, 2936 unverifiable=True) 2937 2938 2939 def extract_timezone(date_str): 2940 m = re.search( 2941 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', 2942 date_str) 2943 if not m: 2944 timezone = datetime.timedelta() 2945 else: 2946 date_str = date_str[:-len(m.group('tz'))] 2947 if not m.group('sign'): 2948 timezone = datetime.timedelta() 2949 else: 2950 sign = 1 if m.group('sign') == '+' else -1 2951 timezone = datetime.timedelta( 2952 hours=sign * int(m.group('hours')), 2953 minutes=sign * int(m.group('minutes'))) 2954 return timezone, date_str 2955 2956 2957 def parse_iso8601(date_str, delimiter='T', timezone=None): 2958 """ Return a UNIX timestamp from the given date """ 2959 2960 if date_str is None: 2961 return None 2962 2963 date_str = re.sub(r'\.[0-9]+', '', date_str) 2964 2965 if timezone is None: 2966 timezone, date_str = extract_timezone(date_str) 2967 2968 try: 2969 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter) 2970 dt = datetime.datetime.strptime(date_str, date_format) - timezone 2971 return calendar.timegm(dt.timetuple()) 2972 except ValueError: 2973 pass 2974 2975 2976 def date_formats(day_first=True): 2977 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST 2978 2979 2980 def unified_strdate(date_str, day_first=True): 2981 """Return a string with the date in the format YYYYMMDD""" 2982 2983 if date_str is None: 2984 return None 2985 upload_date = None 2986 # Replace commas 2987 date_str = date_str.replace(',', ' ') 2988 # Remove AM/PM + timezone 2989 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str) 2990 _, date_str = extract_timezone(date_str) 2991 2992 for expression in date_formats(day_first): 2993 try: 2994 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') 2995 except ValueError: 2996 pass 2997 if upload_date is None: 2998 timetuple = email.utils.parsedate_tz(date_str) 2999 if timetuple: 3000 try: 3001 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d') 3002 except ValueError: 3003 pass 3004 if upload_date is not None: 3005 return compat_str(upload_date) 3006 3007 3008 def unified_timestamp(date_str, day_first=True): 3009 if date_str is None: 3010 return None 3011 3012 date_str = re.sub(r'[,|]', '', date_str) 3013 3014 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0 3015 timezone, date_str = extract_timezone(date_str) 3016 3017 # Remove AM/PM + timezone 3018 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str) 3019 3020 # Remove unrecognized timezones from ISO 8601 alike timestamps 3021 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str) 3022 if m: 3023 date_str = date_str[:-len(m.group('tz'))] 3024 3025 # Python only supports microseconds, so remove nanoseconds 3026 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str) 3027 if m: 3028 date_str = m.group(1) 3029 3030 for expression in date_formats(day_first): 3031 try: 3032 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta) 3033 return calendar.timegm(dt.timetuple()) 3034 except ValueError: 3035 pass 3036 timetuple = email.utils.parsedate_tz(date_str) 3037 if timetuple: 3038 return calendar.timegm(timetuple) + pm_delta * 3600 3039 3040 3041 def determine_ext(url, default_ext='unknown_video'): 3042 if url is None or '.' not in url: 3043 return default_ext 3044 guess = url.partition('?')[0].rpartition('.')[2] 3045 if re.match(r'^[A-Za-z0-9]+$', guess): 3046 return guess 3047 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download 3048 elif guess.rstrip('/') in KNOWN_EXTENSIONS: 3049 return guess.rstrip('/') 3050 else: 3051 return default_ext 3052 3053 3054 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None): 3055 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext) 3056 3057 3058 def date_from_str(date_str): 3059 """ 3060 Return a datetime object from a string in the format YYYYMMDD or 3061 (now|today)[+-][0-9](day|week|month|year)(s)?""" 3062 today = datetime.date.today() 3063 if date_str in ('now', 'today'): 3064 return today 3065 if date_str == 'yesterday': 3066 return today - datetime.timedelta(days=1) 3067 match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str) 3068 if match is not None: 3069 sign = match.group('sign') 3070 time = int(match.group('time')) 3071 if sign == '-': 3072 time = -time 3073 unit = match.group('unit') 3074 # A bad approximation? 3075 if unit == 'month': 3076 unit = 'day' 3077 time *= 30 3078 elif unit == 'year': 3079 unit = 'day' 3080 time *= 365 3081 unit += 's' 3082 delta = datetime.timedelta(**{unit: time}) 3083 return today + delta 3084 return datetime.datetime.strptime(date_str, '%Y%m%d').date() 3085 3086 3087 def hyphenate_date(date_str): 3088 """ 3089 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format""" 3090 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str) 3091 if match is not None: 3092 return '-'.join(match.groups()) 3093 else: 3094 return date_str 3095 3096 3097 class DateRange(object): 3098 """Represents a time interval between two dates""" 3099 3100 def __init__(self, start=None, end=None): 3101 """start and end must be strings in the format accepted by date""" 3102 if start is not None: 3103 self.start = date_from_str(start) 3104 else: 3105 self.start = datetime.datetime.min.date() 3106 if end is not None: 3107 self.end = date_from_str(end) 3108 else: 3109 self.end = datetime.datetime.max.date() 3110 if self.start > self.end: 3111 raise ValueError('Date range: "%s" , the start date must be before the end date' % self) 3112 3113 @classmethod 3114 def day(cls, day): 3115 """Returns a range that only contains the given day""" 3116 return cls(day, day) 3117 3118 def __contains__(self, date): 3119 """Check if the date is in the range""" 3120 if not isinstance(date, datetime.date): 3121 date = date_from_str(date) 3122 return self.start <= date <= self.end 3123 3124 def __str__(self): 3125 return '%s - %s' % (self.start.isoformat(), self.end.isoformat()) 3126 3127 3128 def platform_name(): 3129 """ Returns the platform name as a compat_str """ 3130 res = platform.platform() 3131 if isinstance(res, bytes): 3132 res = res.decode(preferredencoding()) 3133 3134 assert isinstance(res, compat_str) 3135 return res 3136 3137 3138 def _windows_write_string(s, out): 3139 """ Returns True if the string was written using special methods, 3140 False if it has yet to be written out.""" 3141 # Adapted from http://stackoverflow.com/a/3259271/35070 3142 3143 import ctypes 3144 import ctypes.wintypes 3145 3146 WIN_OUTPUT_IDS = { 3147 1: -11, 3148 2: -12, 3149 } 3150 3151 try: 3152 fileno = out.fileno() 3153 except AttributeError: 3154 # If the output stream doesn't have a fileno, it's virtual 3155 return False 3156 except io.UnsupportedOperation: 3157 # Some strange Windows pseudo files? 3158 return False 3159 if fileno not in WIN_OUTPUT_IDS: 3160 return False 3161 3162 GetStdHandle = compat_ctypes_WINFUNCTYPE( 3163 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)( 3164 ('GetStdHandle', ctypes.windll.kernel32)) 3165 h = GetStdHandle(WIN_OUTPUT_IDS[fileno]) 3166 3167 WriteConsoleW = compat_ctypes_WINFUNCTYPE( 3168 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR, 3169 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD), 3170 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32)) 3171 written = ctypes.wintypes.DWORD(0) 3172 3173 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32)) 3174 FILE_TYPE_CHAR = 0x0002 3175 FILE_TYPE_REMOTE = 0x8000 3176 GetConsoleMode = compat_ctypes_WINFUNCTYPE( 3177 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, 3178 ctypes.POINTER(ctypes.wintypes.DWORD))( 3179 ('GetConsoleMode', ctypes.windll.kernel32)) 3180 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value 3181 3182 def not_a_console(handle): 3183 if handle == INVALID_HANDLE_VALUE or handle is None: 3184 return True 3185 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR 3186 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0) 3187 3188 if not_a_console(h): 3189 return False 3190 3191 def next_nonbmp_pos(s): 3192 try: 3193 return next(i for i, c in enumerate(s) if ord(c) > 0xffff) 3194 except StopIteration: 3195 return len(s) 3196 3197 while s: 3198 count = min(next_nonbmp_pos(s), 1024) 3199 3200 ret = WriteConsoleW( 3201 h, s, count if count else 2, ctypes.byref(written), None) 3202 if ret == 0: 3203 raise OSError('Failed to write string') 3204 if not count: # We just wrote a non-BMP character 3205 assert written.value == 2 3206 s = s[1:] 3207 else: 3208 assert written.value > 0 3209 s = s[written.value:] 3210 return True 3211 3212 3213 def write_string(s, out=None, encoding=None): 3214 if out is None: 3215 out = sys.stderr 3216 assert type(s) == compat_str 3217 3218 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'): 3219 if _windows_write_string(s, out): 3220 return 3221 3222 if ('b' in getattr(out, 'mode', '') 3223 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr 3224 byt = s.encode(encoding or preferredencoding(), 'ignore') 3225 out.write(byt) 3226 elif hasattr(out, 'buffer'): 3227 enc = encoding or getattr(out, 'encoding', None) or preferredencoding() 3228 byt = s.encode(enc, 'ignore') 3229 out.buffer.write(byt) 3230 else: 3231 out.write(s) 3232 out.flush() 3233 3234 3235 def bytes_to_intlist(bs): 3236 if not bs: 3237 return [] 3238 if isinstance(bs[0], int): # Python 3 3239 return list(bs) 3240 else: 3241 return [ord(c) for c in bs] 3242 3243 3244 def intlist_to_bytes(xs): 3245 if not xs: 3246 return b'' 3247 return compat_struct_pack('%dB' % len(xs), *xs) 3248 3249 3250 # Cross-platform file locking 3251 if sys.platform == 'win32': 3252 import ctypes.wintypes 3253 import msvcrt 3254 3255 class OVERLAPPED(ctypes.Structure): 3256 _fields_ = [ 3257 ('Internal', ctypes.wintypes.LPVOID), 3258 ('InternalHigh', ctypes.wintypes.LPVOID), 3259 ('Offset', ctypes.wintypes.DWORD), 3260 ('OffsetHigh', ctypes.wintypes.DWORD), 3261 ('hEvent', ctypes.wintypes.HANDLE), 3262 ] 3263 3264 kernel32 = ctypes.windll.kernel32 3265 LockFileEx = kernel32.LockFileEx 3266 LockFileEx.argtypes = [ 3267 ctypes.wintypes.HANDLE, # hFile 3268 ctypes.wintypes.DWORD, # dwFlags 3269 ctypes.wintypes.DWORD, # dwReserved 3270 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow 3271 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh 3272 ctypes.POINTER(OVERLAPPED) # Overlapped 3273 ] 3274 LockFileEx.restype = ctypes.wintypes.BOOL 3275 UnlockFileEx = kernel32.UnlockFileEx 3276 UnlockFileEx.argtypes = [ 3277 ctypes.wintypes.HANDLE, # hFile 3278 ctypes.wintypes.DWORD, # dwReserved 3279 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow 3280 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh 3281 ctypes.POINTER(OVERLAPPED) # Overlapped 3282 ] 3283 UnlockFileEx.restype = ctypes.wintypes.BOOL 3284 whole_low = 0xffffffff 3285 whole_high = 0x7fffffff 3286 3287 def _lock_file(f, exclusive): 3288 overlapped = OVERLAPPED() 3289 overlapped.Offset = 0 3290 overlapped.OffsetHigh = 0 3291 overlapped.hEvent = 0 3292 f._lock_file_overlapped_p = ctypes.pointer(overlapped) 3293 handle = msvcrt.get_osfhandle(f.fileno()) 3294 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0, 3295 whole_low, whole_high, f._lock_file_overlapped_p): 3296 raise OSError('Locking file failed: %r' % ctypes.FormatError()) 3297 3298 def _unlock_file(f): 3299 assert f._lock_file_overlapped_p 3300 handle = msvcrt.get_osfhandle(f.fileno()) 3301 if not UnlockFileEx(handle, 0, 3302 whole_low, whole_high, f._lock_file_overlapped_p): 3303 raise OSError('Unlocking file failed: %r' % ctypes.FormatError()) 3304 3305 else: 3306 # Some platforms, such as Jython, is missing fcntl 3307 try: 3308 import fcntl 3309 3310 def _lock_file(f, exclusive): 3311 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH) 3312 3313 def _unlock_file(f): 3314 fcntl.flock(f, fcntl.LOCK_UN) 3315 except ImportError: 3316 UNSUPPORTED_MSG = 'file locking is not supported on this platform' 3317 3318 def _lock_file(f, exclusive): 3319 raise IOError(UNSUPPORTED_MSG) 3320 3321 def _unlock_file(f): 3322 raise IOError(UNSUPPORTED_MSG) 3323 3324 3325 class locked_file(object): 3326 def __init__(self, filename, mode, encoding=None): 3327 assert mode in ['r', 'a', 'w'] 3328 self.f = io.open(filename, mode, encoding=encoding) 3329 self.mode = mode 3330 3331 def __enter__(self): 3332 exclusive = self.mode != 'r' 3333 try: 3334 _lock_file(self.f, exclusive) 3335 except IOError: 3336 self.f.close() 3337 raise 3338 return self 3339 3340 def __exit__(self, etype, value, traceback): 3341 try: 3342 _unlock_file(self.f) 3343 finally: 3344 self.f.close() 3345 3346 def __iter__(self): 3347 return iter(self.f) 3348 3349 def write(self, *args): 3350 return self.f.write(*args) 3351 3352 def read(self, *args): 3353 return self.f.read(*args) 3354 3355 3356 def get_filesystem_encoding(): 3357 encoding = sys.getfilesystemencoding() 3358 return encoding if encoding is not None else 'utf-8' 3359 3360 3361 def shell_quote(args): 3362 quoted_args = [] 3363 encoding = get_filesystem_encoding() 3364 for a in args: 3365 if isinstance(a, bytes): 3366 # We may get a filename encoded with 'encodeFilename' 3367 a = a.decode(encoding) 3368 quoted_args.append(compat_shlex_quote(a)) 3369 return ' '.join(quoted_args) 3370 3371 3372 def smuggle_url(url, data): 3373 """ Pass additional data in a URL for internal use. """ 3374 3375 url, idata = unsmuggle_url(url, {}) 3376 data.update(idata) 3377 sdata = compat_urllib_parse_urlencode( 3378 {'__youtubedl_smuggle': json.dumps(data)}) 3379 return url + '#' + sdata 3380 3381 3382 def unsmuggle_url(smug_url, default=None): 3383 if '#__youtubedl_smuggle' not in smug_url: 3384 return smug_url, default 3385 url, _, sdata = smug_url.rpartition('#') 3386 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0] 3387 data = json.loads(jsond) 3388 return url, data 3389 3390 3391 def format_bytes(bytes): 3392 if bytes is None: 3393 return 'N/A' 3394 if type(bytes) is str: 3395 bytes = float(bytes) 3396 if bytes == 0.0: 3397 exponent = 0 3398 else: 3399 exponent = int(math.log(bytes, 1024.0)) 3400 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent] 3401 converted = float(bytes) / float(1024 ** exponent) 3402 return '%.2f%s' % (converted, suffix) 3403 3404 3405 def lookup_unit_table(unit_table, s): 3406 units_re = '|'.join(re.escape(u) for u in unit_table) 3407 m = re.match( 3408 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s) 3409 if not m: 3410 return None 3411 num_str = m.group('num').replace(',', '.') 3412 mult = unit_table[m.group('unit')] 3413 return int(float(num_str) * mult) 3414 3415 3416 def parse_filesize(s): 3417 if s is None: 3418 return None 3419 3420 # The lower-case forms are of course incorrect and unofficial, 3421 # but we support those too 3422 _UNIT_TABLE = { 3423 'B': 1, 3424 'b': 1, 3425 'bytes': 1, 3426 'KiB': 1024, 3427 'KB': 1000, 3428 'kB': 1024, 3429 'Kb': 1000, 3430 'kb': 1000, 3431 'kilobytes': 1000, 3432 'kibibytes': 1024, 3433 'MiB': 1024 ** 2, 3434 'MB': 1000 ** 2, 3435 'mB': 1024 ** 2, 3436 'Mb': 1000 ** 2, 3437 'mb': 1000 ** 2, 3438 'megabytes': 1000 ** 2, 3439 'mebibytes': 1024 ** 2, 3440 'GiB': 1024 ** 3, 3441 'GB': 1000 ** 3, 3442 'gB': 1024 ** 3, 3443 'Gb': 1000 ** 3, 3444 'gb': 1000 ** 3, 3445 'gigabytes': 1000 ** 3, 3446 'gibibytes': 1024 ** 3, 3447 'TiB': 1024 ** 4, 3448 'TB': 1000 ** 4, 3449 'tB': 1024 ** 4, 3450 'Tb': 1000 ** 4, 3451 'tb': 1000 ** 4, 3452 'terabytes': 1000 ** 4, 3453 'tebibytes': 1024 ** 4, 3454 'PiB': 1024 ** 5, 3455 'PB': 1000 ** 5, 3456 'pB': 1024 ** 5, 3457 'Pb': 1000 ** 5, 3458 'pb': 1000 ** 5, 3459 'petabytes': 1000 ** 5, 3460 'pebibytes': 1024 ** 5, 3461 'EiB': 1024 ** 6, 3462 'EB': 1000 ** 6, 3463 'eB': 1024 ** 6, 3464 'Eb': 1000 ** 6, 3465 'eb': 1000 ** 6, 3466 'exabytes': 1000 ** 6, 3467 'exbibytes': 1024 ** 6, 3468 'ZiB': 1024 ** 7, 3469 'ZB': 1000 ** 7, 3470 'zB': 1024 ** 7, 3471 'Zb': 1000 ** 7, 3472 'zb': 1000 ** 7, 3473 'zettabytes': 1000 ** 7, 3474 'zebibytes': 1024 ** 7, 3475 'YiB': 1024 ** 8, 3476 'YB': 1000 ** 8, 3477 'yB': 1024 ** 8, 3478 'Yb': 1000 ** 8, 3479 'yb': 1000 ** 8, 3480 'yottabytes': 1000 ** 8, 3481 'yobibytes': 1024 ** 8, 3482 } 3483 3484 return lookup_unit_table(_UNIT_TABLE, s) 3485 3486 3487 def parse_count(s): 3488 if s is None: 3489 return None 3490 3491 s = s.strip() 3492 3493 if re.match(r'^[\d,.]+$', s): 3494 return str_to_int(s) 3495 3496 _UNIT_TABLE = { 3497 'k': 1000, 3498 'K': 1000, 3499 'm': 1000 ** 2, 3500 'M': 1000 ** 2, 3501 'kk': 1000 ** 2, 3502 'KK': 1000 ** 2, 3503 } 3504 3505 return lookup_unit_table(_UNIT_TABLE, s) 3506 3507 3508 def parse_resolution(s): 3509 if s is None: 3510 return {} 3511 3512 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s) 3513 if mobj: 3514 return { 3515 'width': int(mobj.group('w')), 3516 'height': int(mobj.group('h')), 3517 } 3518 3519 mobj = re.search(r'\b(\d+)[pPiI]\b', s) 3520 if mobj: 3521 return {'height': int(mobj.group(1))} 3522 3523 mobj = re.search(r'\b([48])[kK]\b', s) 3524 if mobj: 3525 return {'height': int(mobj.group(1)) * 540} 3526 3527 return {} 3528 3529 3530 def parse_bitrate(s): 3531 if not isinstance(s, compat_str): 3532 return 3533 mobj = re.search(r'\b(\d+)\s*kbps', s) 3534 if mobj: 3535 return int(mobj.group(1)) 3536 3537 3538 def month_by_name(name, lang='en'): 3539 """ Return the number of a month by (locale-independently) English name """ 3540 3541 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en']) 3542 3543 try: 3544 return month_names.index(name) + 1 3545 except ValueError: 3546 return None 3547 3548 3549 def month_by_abbreviation(abbrev): 3550 """ Return the number of a month by (locale-independently) English 3551 abbreviations """ 3552 3553 try: 3554 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1 3555 except ValueError: 3556 return None 3557 3558 3559 def fix_xml_ampersands(xml_str): 3560 """Replace all the '&' by '&' in XML""" 3561 return re.sub( 3562 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)', 3563 '&', 3564 xml_str) 3565 3566 3567 def setproctitle(title): 3568 assert isinstance(title, compat_str) 3569 3570 # ctypes in Jython is not complete 3571 # http://bugs.jython.org/issue2148 3572 if sys.platform.startswith('java'): 3573 return 3574 3575 try: 3576 libc = ctypes.cdll.LoadLibrary('libc.so.6') 3577 except OSError: 3578 return 3579 except TypeError: 3580 # LoadLibrary in Windows Python 2.7.13 only expects 3581 # a bytestring, but since unicode_literals turns 3582 # every string into a unicode string, it fails. 3583 return 3584 title_bytes = title.encode('utf-8') 3585 buf = ctypes.create_string_buffer(len(title_bytes)) 3586 buf.value = title_bytes 3587 try: 3588 libc.prctl(15, buf, 0, 0, 0) 3589 except AttributeError: 3590 return # Strange libc, just skip this 3591 3592 3593 def remove_start(s, start): 3594 return s[len(start):] if s is not None and s.startswith(start) else s 3595 3596 3597 def remove_end(s, end): 3598 return s[:-len(end)] if s is not None and s.endswith(end) else s 3599 3600 3601 def remove_quotes(s): 3602 if s is None or len(s) < 2: 3603 return s 3604 for quote in ('"', "'", ): 3605 if s[0] == quote and s[-1] == quote: 3606 return s[1:-1] 3607 return s 3608 3609 3610 def url_basename(url): 3611 path = compat_urlparse.urlparse(url).path 3612 return path.strip('/').split('/')[-1] 3613 3614 3615 def base_url(url): 3616 return re.match(r'https?://[^?#&]+/', url).group() 3617 3618 3619 def urljoin(base, path): 3620 if isinstance(path, bytes): 3621 path = path.decode('utf-8') 3622 if not isinstance(path, compat_str) or not path: 3623 return None 3624 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path): 3625 return path 3626 if isinstance(base, bytes): 3627 base = base.decode('utf-8') 3628 if not isinstance(base, compat_str) or not re.match( 3629 r'^(?:https?:)?//', base): 3630 return None 3631 return compat_urlparse.urljoin(base, path) 3632 3633 3634 class HEADRequest(compat_urllib_request.Request): 3635 def get_method(self): 3636 return 'HEAD' 3637 3638 3639 class PUTRequest(compat_urllib_request.Request): 3640 def get_method(self): 3641 return 'PUT' 3642 3643 3644 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1): 3645 if get_attr: 3646 if v is not None: 3647 v = getattr(v, get_attr, None) 3648 if v == '': 3649 v = None 3650 if v is None: 3651 return default 3652 try: 3653 return int(v) * invscale // scale 3654 except (ValueError, TypeError): 3655 return default 3656 3657 3658 def str_or_none(v, default=None): 3659 return default if v is None else compat_str(v) 3660 3661 3662 def str_to_int(int_str): 3663 """ A more relaxed version of int_or_none """ 3664 if isinstance(int_str, compat_integer_types): 3665 return int_str 3666 elif isinstance(int_str, compat_str): 3667 int_str = re.sub(r'[,\.\+]', '', int_str) 3668 return int_or_none(int_str) 3669 3670 3671 def float_or_none(v, scale=1, invscale=1, default=None): 3672 if v is None: 3673 return default 3674 try: 3675 return float(v) * invscale / scale 3676 except (ValueError, TypeError): 3677 return default 3678 3679 3680 def bool_or_none(v, default=None): 3681 return v if isinstance(v, bool) else default 3682 3683 3684 def strip_or_none(v, default=None): 3685 return v.strip() if isinstance(v, compat_str) else default 3686 3687 3688 def url_or_none(url): 3689 if not url or not isinstance(url, compat_str): 3690 return None 3691 url = url.strip() 3692 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None 3693 3694 3695 def parse_duration(s): 3696 if not isinstance(s, compat_basestring): 3697 return None 3698 3699 s = s.strip() 3700 3701 days, hours, mins, secs, ms = [None] * 5 3702 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s) 3703 if m: 3704 days, hours, mins, secs, ms = m.groups() 3705 else: 3706 m = re.match( 3707 r'''(?ix)(?:P? 3708 (?: 3709 [0-9]+\s*y(?:ears?)?\s* 3710 )? 3711 (?: 3712 [0-9]+\s*m(?:onths?)?\s* 3713 )? 3714 (?: 3715 [0-9]+\s*w(?:eeks?)?\s* 3716 )? 3717 (?: 3718 (?P<days>[0-9]+)\s*d(?:ays?)?\s* 3719 )? 3720 T)? 3721 (?: 3722 (?P<hours>[0-9]+)\s*h(?:ours?)?\s* 3723 )? 3724 (?: 3725 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s* 3726 )? 3727 (?: 3728 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s* 3729 )?Z?$''', s) 3730 if m: 3731 days, hours, mins, secs, ms = m.groups() 3732 else: 3733 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s) 3734 if m: 3735 hours, mins = m.groups() 3736 else: 3737 return None 3738 3739 duration = 0 3740 if secs: 3741 duration += float(secs) 3742 if mins: 3743 duration += float(mins) * 60 3744 if hours: 3745 duration += float(hours) * 60 * 60 3746 if days: 3747 duration += float(days) * 24 * 60 * 60 3748 if ms: 3749 duration += float(ms) 3750 return duration 3751 3752 3753 def prepend_extension(filename, ext, expected_real_ext=None): 3754 name, real_ext = os.path.splitext(filename) 3755 return ( 3756 '{0}.{1}{2}'.format(name, ext, real_ext) 3757 if not expected_real_ext or real_ext[1:] == expected_real_ext 3758 else '{0}.{1}'.format(filename, ext)) 3759 3760 3761 def replace_extension(filename, ext, expected_real_ext=None): 3762 name, real_ext = os.path.splitext(filename) 3763 return '{0}.{1}'.format( 3764 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename, 3765 ext) 3766 3767 3768 def check_executable(exe, args=[]): 3769 """ Checks if the given binary is installed somewhere in PATH, and returns its name. 3770 args can be a list of arguments for a short output (like -version) """ 3771 try: 3772 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() 3773 except OSError: 3774 return False 3775 return exe 3776 3777 3778 def get_exe_version(exe, args=['--version'], 3779 version_re=None, unrecognized='present'): 3780 """ Returns the version of the specified executable, 3781 or False if the executable is not present """ 3782 try: 3783 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers 3784 # SIGTTOU if youtube-dl is run in the background. 3785 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656 3786 out, _ = subprocess.Popen( 3787 [encodeArgument(exe)] + args, 3788 stdin=subprocess.PIPE, 3789 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate() 3790 except OSError: 3791 return False 3792 if isinstance(out, bytes): # Python 2.x 3793 out = out.decode('ascii', 'ignore') 3794 return detect_exe_version(out, version_re, unrecognized) 3795 3796 3797 def detect_exe_version(output, version_re=None, unrecognized='present'): 3798 assert isinstance(output, compat_str) 3799 if version_re is None: 3800 version_re = r'version\s+([-0-9._a-zA-Z]+)' 3801 m = re.search(version_re, output) 3802 if m: 3803 return m.group(1) 3804 else: 3805 return unrecognized 3806 3807 3808 class PagedList(object): 3809 def __len__(self): 3810 # This is only useful for tests 3811 return len(self.getslice()) 3812 3813 3814 class OnDemandPagedList(PagedList): 3815 def __init__(self, pagefunc, pagesize, use_cache=True): 3816 self._pagefunc = pagefunc 3817 self._pagesize = pagesize 3818 self._use_cache = use_cache 3819 if use_cache: 3820 self._cache = {} 3821 3822 def getslice(self, start=0, end=None): 3823 res = [] 3824 for pagenum in itertools.count(start // self._pagesize): 3825 firstid = pagenum * self._pagesize 3826 nextfirstid = pagenum * self._pagesize + self._pagesize 3827 if start >= nextfirstid: 3828 continue 3829 3830 page_results = None 3831 if self._use_cache: 3832 page_results = self._cache.get(pagenum) 3833 if page_results is None: 3834 page_results = list(self._pagefunc(pagenum)) 3835 if self._use_cache: 3836 self._cache[pagenum] = page_results 3837 3838 startv = ( 3839 start % self._pagesize 3840 if firstid <= start < nextfirstid 3841 else 0) 3842 3843 endv = ( 3844 ((end - 1) % self._pagesize) + 1 3845 if (end is not None and firstid <= end <= nextfirstid) 3846 else None) 3847 3848 if startv != 0 or endv is not None: 3849 page_results = page_results[startv:endv] 3850 res.extend(page_results) 3851 3852 # A little optimization - if current page is not "full", ie. does 3853 # not contain page_size videos then we can assume that this page 3854 # is the last one - there are no more ids on further pages - 3855 # i.e. no need to query again. 3856 if len(page_results) + startv < self._pagesize: 3857 break 3858 3859 # If we got the whole page, but the next page is not interesting, 3860 # break out early as well 3861 if end == nextfirstid: 3862 break 3863 return res 3864 3865 3866 class InAdvancePagedList(PagedList): 3867 def __init__(self, pagefunc, pagecount, pagesize): 3868 self._pagefunc = pagefunc 3869 self._pagecount = pagecount 3870 self._pagesize = pagesize 3871 3872 def getslice(self, start=0, end=None): 3873 res = [] 3874 start_page = start // self._pagesize 3875 end_page = ( 3876 self._pagecount if end is None else (end // self._pagesize + 1)) 3877 skip_elems = start - start_page * self._pagesize 3878 only_more = None if end is None else end - start 3879 for pagenum in range(start_page, end_page): 3880 page = list(self._pagefunc(pagenum)) 3881 if skip_elems: 3882 page = page[skip_elems:] 3883 skip_elems = None 3884 if only_more is not None: 3885 if len(page) < only_more: 3886 only_more -= len(page) 3887 else: 3888 page = page[:only_more] 3889 res.extend(page) 3890 break 3891 res.extend(page) 3892 return res 3893 3894 3895 def uppercase_escape(s): 3896 unicode_escape = codecs.getdecoder('unicode_escape') 3897 return re.sub( 3898 r'\\U[0-9a-fA-F]{8}', 3899 lambda m: unicode_escape(m.group(0))[0], 3900 s) 3901 3902 3903 def lowercase_escape(s): 3904 unicode_escape = codecs.getdecoder('unicode_escape') 3905 return re.sub( 3906 r'\\u[0-9a-fA-F]{4}', 3907 lambda m: unicode_escape(m.group(0))[0], 3908 s) 3909 3910 3911 def escape_rfc3986(s): 3912 """Escape non-ASCII characters as suggested by RFC 3986""" 3913 if sys.version_info < (3, 0) and isinstance(s, compat_str): 3914 s = s.encode('utf-8') 3915 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]") 3916 3917 3918 def escape_url(url): 3919 """Escape URL as suggested by RFC 3986""" 3920 url_parsed = compat_urllib_parse_urlparse(url) 3921 return url_parsed._replace( 3922 netloc=url_parsed.netloc.encode('idna').decode('ascii'), 3923 path=escape_rfc3986(url_parsed.path), 3924 params=escape_rfc3986(url_parsed.params), 3925 query=escape_rfc3986(url_parsed.query), 3926 fragment=escape_rfc3986(url_parsed.fragment) 3927 ).geturl() 3928 3929 3930 def read_batch_urls(batch_fd): 3931 def fixup(url): 3932 if not isinstance(url, compat_str): 3933 url = url.decode('utf-8', 'replace') 3934 BOM_UTF8 = '\xef\xbb\xbf' 3935 if url.startswith(BOM_UTF8): 3936 url = url[len(BOM_UTF8):] 3937 url = url.strip() 3938 if url.startswith(('#', ';', ']')): 3939 return False 3940 return url 3941 3942 with contextlib.closing(batch_fd) as fd: 3943 return [url for url in map(fixup, fd) if url] 3944 3945 3946 def urlencode_postdata(*args, **kargs): 3947 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii') 3948 3949 3950 def update_url_query(url, query): 3951 if not query: 3952 return url 3953 parsed_url = compat_urlparse.urlparse(url) 3954 qs = compat_parse_qs(parsed_url.query) 3955 qs.update(query) 3956 return compat_urlparse.urlunparse(parsed_url._replace( 3957 query=compat_urllib_parse_urlencode(qs, True))) 3958 3959 3960 def update_Request(req, url=None, data=None, headers={}, query={}): 3961 req_headers = req.headers.copy() 3962 req_headers.update(headers) 3963 req_data = data or req.data 3964 req_url = update_url_query(url or req.get_full_url(), query) 3965 req_get_method = req.get_method() 3966 if req_get_method == 'HEAD': 3967 req_type = HEADRequest 3968 elif req_get_method == 'PUT': 3969 req_type = PUTRequest 3970 else: 3971 req_type = compat_urllib_request.Request 3972 new_req = req_type( 3973 req_url, data=req_data, headers=req_headers, 3974 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) 3975 if hasattr(req, 'timeout'): 3976 new_req.timeout = req.timeout 3977 return new_req 3978 3979 3980 def _multipart_encode_impl(data, boundary): 3981 content_type = 'multipart/form-data; boundary=%s' % boundary 3982 3983 out = b'' 3984 for k, v in data.items(): 3985 out += b'--' + boundary.encode('ascii') + b'\r\n' 3986 if isinstance(k, compat_str): 3987 k = k.encode('utf-8') 3988 if isinstance(v, compat_str): 3989 v = v.encode('utf-8') 3990 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578 3991 # suggests sending UTF-8 directly. Firefox sends UTF-8, too 3992 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n' 3993 if boundary.encode('ascii') in content: 3994 raise ValueError('Boundary overlaps with data') 3995 out += content 3996 3997 out += b'--' + boundary.encode('ascii') + b'--\r\n' 3998 3999 return out, content_type 4000 4001 4002 def multipart_encode(data, boundary=None): 4003 ''' 4004 Encode a dict to RFC 7578-compliant form-data 4005 4006 data: 4007 A dict where keys and values can be either Unicode or bytes-like 4008 objects. 4009 boundary: 4010 If specified a Unicode object, it's used as the boundary. Otherwise 4011 a random boundary is generated. 4012 4013 Reference: https://tools.ietf.org/html/rfc7578 4014 ''' 4015 has_specified_boundary = boundary is not None 4016 4017 while True: 4018 if boundary is None: 4019 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff)) 4020 4021 try: 4022 out, content_type = _multipart_encode_impl(data, boundary) 4023 break 4024 except ValueError: 4025 if has_specified_boundary: 4026 raise 4027 boundary = None 4028 4029 return out, content_type 4030 4031 4032 def dict_get(d, key_or_keys, default=None, skip_false_values=True): 4033 if isinstance(key_or_keys, (list, tuple)): 4034 for key in key_or_keys: 4035 if key not in d or d[key] is None or skip_false_values and not d[key]: 4036 continue 4037 return d[key] 4038 return default 4039 return d.get(key_or_keys, default) 4040 4041 4042 def try_get(src, getter, expected_type=None): 4043 if not isinstance(getter, (list, tuple)): 4044 getter = [getter] 4045 for get in getter: 4046 try: 4047 v = get(src) 4048 except (AttributeError, KeyError, TypeError, IndexError): 4049 pass 4050 else: 4051 if expected_type is None or isinstance(v, expected_type): 4052 return v 4053 4054 4055 def merge_dicts(*dicts): 4056 merged = {} 4057 for a_dict in dicts: 4058 for k, v in a_dict.items(): 4059 if v is None: 4060 continue 4061 if (k not in merged 4062 or (isinstance(v, compat_str) and v 4063 and isinstance(merged[k], compat_str) 4064 and not merged[k])): 4065 merged[k] = v 4066 return merged 4067 4068 4069 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'): 4070 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors) 4071 4072 4073 US_RATINGS = { 4074 'G': 0, 4075 'PG': 10, 4076 'PG-13': 13, 4077 'R': 16, 4078 'NC': 18, 4079 } 4080 4081 4082 TV_PARENTAL_GUIDELINES = { 4083 'TV-Y': 0, 4084 'TV-Y7': 7, 4085 'TV-G': 0, 4086 'TV-PG': 0, 4087 'TV-14': 14, 4088 'TV-MA': 17, 4089 } 4090 4091 4092 def parse_age_limit(s): 4093 if type(s) == int: 4094 return s if 0 <= s <= 21 else None 4095 if not isinstance(s, compat_basestring): 4096 return None 4097 m = re.match(r'^(?P<age>\d{1,2})\+?$', s) 4098 if m: 4099 return int(m.group('age')) 4100 if s in US_RATINGS: 4101 return US_RATINGS[s] 4102 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s) 4103 if m: 4104 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)] 4105 return None 4106 4107 4108 def strip_jsonp(code): 4109 return re.sub( 4110 r'''(?sx)^ 4111 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*) 4112 (?:\s*&&\s*(?P=func_name))? 4113 \s*\(\s*(?P<callback_data>.*)\);? 4114 \s*?(?://[^\n]*)*$''', 4115 r'\g<callback_data>', code) 4116 4117 4118 def js_to_json(code): 4119 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*' 4120 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE) 4121 INTEGER_TABLE = ( 4122 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16), 4123 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8), 4124 ) 4125 4126 def fix_kv(m): 4127 v = m.group(0) 4128 if v in ('true', 'false', 'null'): 4129 return v 4130 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',': 4131 return "" 4132 4133 if v[0] in ("'", '"'): 4134 v = re.sub(r'(?s)\\.|"', lambda m: { 4135 '"': '\\"', 4136 "\\'": "'", 4137 '\\\n': '', 4138 '\\x': '\\u00', 4139 }.get(m.group(0), m.group(0)), v[1:-1]) 4140 else: 4141 for regex, base in INTEGER_TABLE: 4142 im = re.match(regex, v) 4143 if im: 4144 i = int(im.group(1), base) 4145 return '"%d":' % i if v.endswith(':') else '%d' % i 4146 4147 return '"%s"' % v 4148 4149 return re.sub(r'''(?sx) 4150 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| 4151 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| 4152 {comment}|,(?={skip}[\]}}])| 4153 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*| 4154 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?| 4155 [0-9]+(?={skip}:)| 4156 !+ 4157 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code) 4158 4159 4160 def qualities(quality_ids): 4161 """ Get a numeric quality value out of a list of possible values """ 4162 def q(qid): 4163 try: 4164 return quality_ids.index(qid) 4165 except ValueError: 4166 return -1 4167 return q 4168 4169 4170 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s' 4171 4172 4173 def limit_length(s, length): 4174 """ Add ellipses to overly long strings """ 4175 if s is None: 4176 return None 4177 ELLIPSES = '...' 4178 if len(s) > length: 4179 return s[:length - len(ELLIPSES)] + ELLIPSES 4180 return s 4181 4182 4183 def version_tuple(v): 4184 return tuple(int(e) for e in re.split(r'[-.]', v)) 4185 4186 4187 def is_outdated_version(version, limit, assume_new=True): 4188 if not version: 4189 return not assume_new 4190 try: 4191 return version_tuple(version) < version_tuple(limit) 4192 except ValueError: 4193 return not assume_new 4194 4195 4196 def ytdl_is_updateable(): 4197 """ Returns if youtube-dl can be updated with -U """ 4198 from zipimport import zipimporter 4199 4200 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen') 4201 4202 4203 def args_to_str(args): 4204 # Get a short string representation for a subprocess command 4205 return ' '.join(compat_shlex_quote(a) for a in args) 4206 4207 4208 def error_to_compat_str(err): 4209 err_str = str(err) 4210 # On python 2 error byte string must be decoded with proper 4211 # encoding rather than ascii 4212 if sys.version_info[0] < 3: 4213 err_str = err_str.decode(preferredencoding()) 4214 return err_str 4215 4216 4217 def mimetype2ext(mt): 4218 if mt is None: 4219 return None 4220 4221 ext = { 4222 'audio/mp4': 'm4a', 4223 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as 4224 # it's the most popular one 4225 'audio/mpeg': 'mp3', 4226 }.get(mt) 4227 if ext is not None: 4228 return ext 4229 4230 _, _, res = mt.rpartition('/') 4231 res = res.split(';')[0].strip().lower() 4232 4233 return { 4234 '3gpp': '3gp', 4235 'smptett+xml': 'tt', 4236 'ttaf+xml': 'dfxp', 4237 'ttml+xml': 'ttml', 4238 'x-flv': 'flv', 4239 'x-mp4-fragmented': 'mp4', 4240 'x-ms-sami': 'sami', 4241 'x-ms-wmv': 'wmv', 4242 'mpegurl': 'm3u8', 4243 'x-mpegurl': 'm3u8', 4244 'vnd.apple.mpegurl': 'm3u8', 4245 'dash+xml': 'mpd', 4246 'f4m+xml': 'f4m', 4247 'hds+xml': 'f4m', 4248 'vnd.ms-sstr+xml': 'ism', 4249 'quicktime': 'mov', 4250 'mp2t': 'ts', 4251 'x-wav': 'wav', 4252 }.get(res, res) 4253 4254 4255 def parse_codecs(codecs_str): 4256 # http://tools.ietf.org/html/rfc6381 4257 if not codecs_str: 4258 return {} 4259 split_codecs = list(filter(None, map( 4260 lambda str: str.strip(), codecs_str.strip().strip(',').split(',')))) 4261 vcodec, acodec = None, None 4262 for full_codec in split_codecs: 4263 codec = full_codec.split('.')[0] 4264 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'): 4265 if not vcodec: 4266 vcodec = full_codec 4267 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): 4268 if not acodec: 4269 acodec = full_codec 4270 else: 4271 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr) 4272 if not vcodec and not acodec: 4273 if len(split_codecs) == 2: 4274 return { 4275 'vcodec': split_codecs[0], 4276 'acodec': split_codecs[1], 4277 } 4278 else: 4279 return { 4280 'vcodec': vcodec or 'none', 4281 'acodec': acodec or 'none', 4282 } 4283 return {} 4284 4285 4286 def urlhandle_detect_ext(url_handle): 4287 getheader = url_handle.headers.get 4288 4289 cd = getheader('Content-Disposition') 4290 if cd: 4291 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd) 4292 if m: 4293 e = determine_ext(m.group('filename'), default_ext=None) 4294 if e: 4295 return e 4296 4297 return mimetype2ext(getheader('Content-Type')) 4298 4299 4300 def encode_data_uri(data, mime_type): 4301 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii')) 4302 4303 4304 def age_restricted(content_limit, age_limit): 4305 """ Returns True iff the content should be blocked """ 4306 4307 if age_limit is None: # No limit set 4308 return False 4309 if content_limit is None: 4310 return False # Content available for everyone 4311 return age_limit < content_limit 4312 4313 4314 def is_html(first_bytes): 4315 """ Detect whether a file contains HTML by examining its first bytes. """ 4316 4317 BOMS = [ 4318 (b'\xef\xbb\xbf', 'utf-8'), 4319 (b'\x00\x00\xfe\xff', 'utf-32-be'), 4320 (b'\xff\xfe\x00\x00', 'utf-32-le'), 4321 (b'\xff\xfe', 'utf-16-le'), 4322 (b'\xfe\xff', 'utf-16-be'), 4323 ] 4324 for bom, enc in BOMS: 4325 if first_bytes.startswith(bom): 4326 s = first_bytes[len(bom):].decode(enc, 'replace') 4327 break 4328 else: 4329 s = first_bytes.decode('utf-8', 'replace') 4330 4331 return re.match(r'^\s*<', s) 4332 4333 4334 def determine_protocol(info_dict): 4335 protocol = info_dict.get('protocol') 4336 if protocol is not None: 4337 return protocol 4338 4339 url = info_dict['url'] 4340 if url.startswith('rtmp'): 4341 return 'rtmp' 4342 elif url.startswith('mms'): 4343 return 'mms' 4344 elif url.startswith('rtsp'): 4345 return 'rtsp' 4346 4347 ext = determine_ext(url) 4348 if ext == 'm3u8': 4349 return 'm3u8' 4350 elif ext == 'f4m': 4351 return 'f4m' 4352 4353 return compat_urllib_parse_urlparse(url).scheme 4354 4355 4356 def render_table(header_row, data): 4357 """ Render a list of rows, each as a list of values """ 4358 table = [header_row] + data 4359 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)] 4360 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s' 4361 return '\n'.join(format_str % tuple(row) for row in table) 4362 4363 4364 def _match_one(filter_part, dct): 4365 COMPARISON_OPERATORS = { 4366 '<': operator.lt, 4367 '<=': operator.le, 4368 '>': operator.gt, 4369 '>=': operator.ge, 4370 '=': operator.eq, 4371 '!=': operator.ne, 4372 } 4373 operator_rex = re.compile(r'''(?x)\s* 4374 (?P<key>[a-z_]+) 4375 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* 4376 (?: 4377 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)| 4378 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)| 4379 (?P<strval>(?![0-9.])[a-z0-9A-Z]*) 4380 ) 4381 \s*$ 4382 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys()))) 4383 m = operator_rex.search(filter_part) 4384 if m: 4385 op = COMPARISON_OPERATORS[m.group('op')] 4386 actual_value = dct.get(m.group('key')) 4387 if (m.group('quotedstrval') is not None 4388 or m.group('strval') is not None 4389 # If the original field is a string and matching comparisonvalue is 4390 # a number we should respect the origin of the original field 4391 # and process comparison value as a string (see 4392 # https://github.com/ytdl-org/youtube-dl/issues/11082). 4393 or actual_value is not None and m.group('intval') is not None 4394 and isinstance(actual_value, compat_str)): 4395 if m.group('op') not in ('=', '!='): 4396 raise ValueError( 4397 'Operator %s does not support string values!' % m.group('op')) 4398 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval') 4399 quote = m.group('quote') 4400 if quote is not None: 4401 comparison_value = comparison_value.replace(r'\%s' % quote, quote) 4402 else: 4403 try: 4404 comparison_value = int(m.group('intval')) 4405 except ValueError: 4406 comparison_value = parse_filesize(m.group('intval')) 4407 if comparison_value is None: 4408 comparison_value = parse_filesize(m.group('intval') + 'B') 4409 if comparison_value is None: 4410 raise ValueError( 4411 'Invalid integer value %r in filter part %r' % ( 4412 m.group('intval'), filter_part)) 4413 if actual_value is None: 4414 return m.group('none_inclusive') 4415 return op(actual_value, comparison_value) 4416 4417 UNARY_OPERATORS = { 4418 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None), 4419 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None), 4420 } 4421 operator_rex = re.compile(r'''(?x)\s* 4422 (?P<op>%s)\s*(?P<key>[a-z_]+) 4423 \s*$ 4424 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys()))) 4425 m = operator_rex.search(filter_part) 4426 if m: 4427 op = UNARY_OPERATORS[m.group('op')] 4428 actual_value = dct.get(m.group('key')) 4429 return op(actual_value) 4430 4431 raise ValueError('Invalid filter part %r' % filter_part) 4432 4433 4434 def match_str(filter_str, dct): 4435 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """ 4436 4437 return all( 4438 _match_one(filter_part, dct) for filter_part in filter_str.split('&')) 4439 4440 4441 def match_filter_func(filter_str): 4442 def _match_func(info_dict): 4443 if match_str(filter_str, info_dict): 4444 return None 4445 else: 4446 video_title = info_dict.get('title', info_dict.get('id', 'video')) 4447 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str) 4448 return _match_func 4449 4450 4451 def parse_dfxp_time_expr(time_expr): 4452 if not time_expr: 4453 return 4454 4455 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr) 4456 if mobj: 4457 return float(mobj.group('time_offset')) 4458 4459 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr) 4460 if mobj: 4461 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.')) 4462 4463 4464 def srt_subtitles_timecode(seconds): 4465 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000) 4466 4467 4468 def dfxp2srt(dfxp_data): 4469 ''' 4470 @param dfxp_data A bytes-like object containing DFXP data 4471 @returns A unicode object containing converted SRT data 4472 ''' 4473 LEGACY_NAMESPACES = ( 4474 (b'http://www.w3.org/ns/ttml', [ 4475 b'http://www.w3.org/2004/11/ttaf1', 4476 b'http://www.w3.org/2006/04/ttaf1', 4477 b'http://www.w3.org/2006/10/ttaf1', 4478 ]), 4479 (b'http://www.w3.org/ns/ttml#styling', [ 4480 b'http://www.w3.org/ns/ttml#style', 4481 ]), 4482 ) 4483 4484 SUPPORTED_STYLING = [ 4485 'color', 4486 'fontFamily', 4487 'fontSize', 4488 'fontStyle', 4489 'fontWeight', 4490 'textDecoration' 4491 ] 4492 4493 _x = functools.partial(xpath_with_ns, ns_map={ 4494 'xml': 'http://www.w3.org/XML/1998/namespace', 4495 'ttml': 'http://www.w3.org/ns/ttml', 4496 'tts': 'http://www.w3.org/ns/ttml#styling', 4497 }) 4498 4499 styles = {} 4500 default_style = {} 4501 4502 class TTMLPElementParser(object): 4503 _out = '' 4504 _unclosed_elements = [] 4505 _applied_styles = [] 4506 4507 def start(self, tag, attrib): 4508 if tag in (_x('ttml:br'), 'br'): 4509 self._out += '\n' 4510 else: 4511 unclosed_elements = [] 4512 style = {} 4513 element_style_id = attrib.get('style') 4514 if default_style: 4515 style.update(default_style) 4516 if element_style_id: 4517 style.update(styles.get(element_style_id, {})) 4518 for prop in SUPPORTED_STYLING: 4519 prop_val = attrib.get(_x('tts:' + prop)) 4520 if prop_val: 4521 style[prop] = prop_val 4522 if style: 4523 font = '' 4524 for k, v in sorted(style.items()): 4525 if self._applied_styles and self._applied_styles[-1].get(k) == v: 4526 continue 4527 if k == 'color': 4528 font += ' color="%s"' % v 4529 elif k == 'fontSize': 4530 font += ' size="%s"' % v 4531 elif k == 'fontFamily': 4532 font += ' face="%s"' % v 4533 elif k == 'fontWeight' and v == 'bold': 4534 self._out += '<b>' 4535 unclosed_elements.append('b') 4536 elif k == 'fontStyle' and v == 'italic': 4537 self._out += '<i>' 4538 unclosed_elements.append('i') 4539 elif k == 'textDecoration' and v == 'underline': 4540 self._out += '<u>' 4541 unclosed_elements.append('u') 4542 if font: 4543 self._out += '<font' + font + '>' 4544 unclosed_elements.append('font') 4545 applied_style = {} 4546 if self._applied_styles: 4547 applied_style.update(self._applied_styles[-1]) 4548 applied_style.update(style) 4549 self._applied_styles.append(applied_style) 4550 self._unclosed_elements.append(unclosed_elements) 4551 4552 def end(self, tag): 4553 if tag not in (_x('ttml:br'), 'br'): 4554 unclosed_elements = self._unclosed_elements.pop() 4555 for element in reversed(unclosed_elements): 4556 self._out += '</%s>' % element 4557 if unclosed_elements and self._applied_styles: 4558 self._applied_styles.pop() 4559 4560 def data(self, data): 4561 self._out += data 4562 4563 def close(self): 4564 return self._out.strip() 4565 4566 def parse_node(node): 4567 target = TTMLPElementParser() 4568 parser = xml.etree.ElementTree.XMLParser(target=target) 4569 parser.feed(xml.etree.ElementTree.tostring(node)) 4570 return parser.close() 4571 4572 for k, v in LEGACY_NAMESPACES: 4573 for ns in v: 4574 dfxp_data = dfxp_data.replace(ns, k) 4575 4576 dfxp = compat_etree_fromstring(dfxp_data) 4577 out = [] 4578 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p') 4579 4580 if not paras: 4581 raise ValueError('Invalid dfxp/TTML subtitle') 4582 4583 repeat = False 4584 while True: 4585 for style in dfxp.findall(_x('.//ttml:style')): 4586 style_id = style.get('id') or style.get(_x('xml:id')) 4587 if not style_id: 4588 continue 4589 parent_style_id = style.get('style') 4590 if parent_style_id: 4591 if parent_style_id not in styles: 4592 repeat = True 4593 continue 4594 styles[style_id] = styles[parent_style_id].copy() 4595 for prop in SUPPORTED_STYLING: 4596 prop_val = style.get(_x('tts:' + prop)) 4597 if prop_val: 4598 styles.setdefault(style_id, {})[prop] = prop_val 4599 if repeat: 4600 repeat = False 4601 else: 4602 break 4603 4604 for p in ('body', 'div'): 4605 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p]) 4606 if ele is None: 4607 continue 4608 style = styles.get(ele.get('style')) 4609 if not style: 4610 continue 4611 default_style.update(style) 4612 4613 for para, index in zip(paras, itertools.count(1)): 4614 begin_time = parse_dfxp_time_expr(para.attrib.get('begin')) 4615 end_time = parse_dfxp_time_expr(para.attrib.get('end')) 4616 dur = parse_dfxp_time_expr(para.attrib.get('dur')) 4617 if begin_time is None: 4618 continue 4619 if not end_time: 4620 if not dur: 4621 continue 4622 end_time = begin_time + dur 4623 out.append('%d\n%s --> %s\n%s\n\n' % ( 4624 index, 4625 srt_subtitles_timecode(begin_time), 4626 srt_subtitles_timecode(end_time), 4627 parse_node(para))) 4628 4629 return ''.join(out) 4630 4631 4632 def cli_option(params, command_option, param): 4633 param = params.get(param) 4634 if param: 4635 param = compat_str(param) 4636 return [command_option, param] if param is not None else [] 4637 4638 4639 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None): 4640 param = params.get(param) 4641 if param is None: 4642 return [] 4643 assert isinstance(param, bool) 4644 if separator: 4645 return [command_option + separator + (true_value if param else false_value)] 4646 return [command_option, true_value if param else false_value] 4647 4648 4649 def cli_valueless_option(params, command_option, param, expected_value=True): 4650 param = params.get(param) 4651 return [command_option] if param == expected_value else [] 4652 4653 4654 def cli_configuration_args(params, param, default=[]): 4655 ex_args = params.get(param) 4656 if ex_args is None: 4657 return default 4658 assert isinstance(ex_args, list) 4659 return ex_args 4660 4661 4662 class ISO639Utils(object): 4663 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt 4664 _lang_map = { 4665 'aa': 'aar', 4666 'ab': 'abk', 4667 'ae': 'ave', 4668 'af': 'afr', 4669 'ak': 'aka', 4670 'am': 'amh', 4671 'an': 'arg', 4672 'ar': 'ara', 4673 'as': 'asm', 4674 'av': 'ava', 4675 'ay': 'aym', 4676 'az': 'aze', 4677 'ba': 'bak', 4678 'be': 'bel', 4679 'bg': 'bul', 4680 'bh': 'bih', 4681 'bi': 'bis', 4682 'bm': 'bam', 4683 'bn': 'ben', 4684 'bo': 'bod', 4685 'br': 'bre', 4686 'bs': 'bos', 4687 'ca': 'cat', 4688 'ce': 'che', 4689 'ch': 'cha', 4690 'co': 'cos', 4691 'cr': 'cre', 4692 'cs': 'ces', 4693 'cu': 'chu', 4694 'cv': 'chv', 4695 'cy': 'cym', 4696 'da': 'dan', 4697 'de': 'deu', 4698 'dv': 'div', 4699 'dz': 'dzo', 4700 'ee': 'ewe', 4701 'el': 'ell', 4702 'en': 'eng', 4703 'eo': 'epo', 4704 'es': 'spa', 4705 'et': 'est', 4706 'eu': 'eus', 4707 'fa': 'fas', 4708 'ff': 'ful', 4709 'fi': 'fin', 4710 'fj': 'fij', 4711 'fo': 'fao', 4712 'fr': 'fra', 4713 'fy': 'fry', 4714 'ga': 'gle', 4715 'gd': 'gla', 4716 'gl': 'glg', 4717 'gn': 'grn', 4718 'gu': 'guj', 4719 'gv': 'glv', 4720 'ha': 'hau', 4721 'he': 'heb', 4722 'iw': 'heb', # Replaced by he in 1989 revision 4723 'hi': 'hin', 4724 'ho': 'hmo', 4725 'hr': 'hrv', 4726 'ht': 'hat', 4727 'hu': 'hun', 4728 'hy': 'hye', 4729 'hz': 'her', 4730 'ia': 'ina', 4731 'id': 'ind', 4732 'in': 'ind', # Replaced by id in 1989 revision 4733 'ie': 'ile', 4734 'ig': 'ibo', 4735 'ii': 'iii', 4736 'ik': 'ipk', 4737 'io': 'ido', 4738 'is': 'isl', 4739 'it': 'ita', 4740 'iu': 'iku', 4741 'ja': 'jpn', 4742 'jv': 'jav', 4743 'ka': 'kat', 4744 'kg': 'kon', 4745 'ki': 'kik', 4746 'kj': 'kua', 4747 'kk': 'kaz', 4748 'kl': 'kal', 4749 'km': 'khm', 4750 'kn': 'kan', 4751 'ko': 'kor', 4752 'kr': 'kau', 4753 'ks': 'kas', 4754 'ku': 'kur', 4755 'kv': 'kom', 4756 'kw': 'cor', 4757 'ky': 'kir', 4758 'la': 'lat', 4759 'lb': 'ltz', 4760 'lg': 'lug', 4761 'li': 'lim', 4762 'ln': 'lin', 4763 'lo': 'lao', 4764 'lt': 'lit', 4765 'lu': 'lub', 4766 'lv': 'lav', 4767 'mg': 'mlg', 4768 'mh': 'mah', 4769 'mi': 'mri', 4770 'mk': 'mkd', 4771 'ml': 'mal', 4772 'mn': 'mon', 4773 'mr': 'mar', 4774 'ms': 'msa', 4775 'mt': 'mlt', 4776 'my': 'mya', 4777 'na': 'nau', 4778 'nb': 'nob', 4779 'nd': 'nde', 4780 'ne': 'nep', 4781 'ng': 'ndo', 4782 'nl': 'nld', 4783 'nn': 'nno', 4784 'no': 'nor', 4785 'nr': 'nbl', 4786 'nv': 'nav', 4787 'ny': 'nya', 4788 'oc': 'oci', 4789 'oj': 'oji', 4790 'om': 'orm', 4791 'or': 'ori', 4792 'os': 'oss', 4793 'pa': 'pan', 4794 'pi': 'pli', 4795 'pl': 'pol', 4796 'ps': 'pus', 4797 'pt': 'por', 4798 'qu': 'que', 4799 'rm': 'roh', 4800 'rn': 'run', 4801 'ro': 'ron', 4802 'ru': 'rus', 4803 'rw': 'kin', 4804 'sa': 'san', 4805 'sc': 'srd', 4806 'sd': 'snd', 4807 'se': 'sme', 4808 'sg': 'sag', 4809 'si': 'sin', 4810 'sk': 'slk', 4811 'sl': 'slv', 4812 'sm': 'smo', 4813 'sn': 'sna', 4814 'so': 'som', 4815 'sq': 'sqi', 4816 'sr': 'srp', 4817 'ss': 'ssw', 4818 'st': 'sot', 4819 'su': 'sun', 4820 'sv': 'swe', 4821 'sw': 'swa', 4822 'ta': 'tam', 4823 'te': 'tel', 4824 'tg': 'tgk', 4825 'th': 'tha', 4826 'ti': 'tir', 4827 'tk': 'tuk', 4828 'tl': 'tgl', 4829 'tn': 'tsn', 4830 'to': 'ton', 4831 'tr': 'tur', 4832 'ts': 'tso', 4833 'tt': 'tat', 4834 'tw': 'twi', 4835 'ty': 'tah', 4836 'ug': 'uig', 4837 'uk': 'ukr', 4838 'ur': 'urd', 4839 'uz': 'uzb', 4840 've': 'ven', 4841 'vi': 'vie', 4842 'vo': 'vol', 4843 'wa': 'wln', 4844 'wo': 'wol', 4845 'xh': 'xho', 4846 'yi': 'yid', 4847 'ji': 'yid', # Replaced by yi in 1989 revision 4848 'yo': 'yor', 4849 'za': 'zha', 4850 'zh': 'zho', 4851 'zu': 'zul', 4852 } 4853 4854 @classmethod 4855 def short2long(cls, code): 4856 """Convert language code from ISO 639-1 to ISO 639-2/T""" 4857 return cls._lang_map.get(code[:2]) 4858 4859 @classmethod 4860 def long2short(cls, code): 4861 """Convert language code from ISO 639-2/T to ISO 639-1""" 4862 for short_name, long_name in cls._lang_map.items(): 4863 if long_name == code: 4864 return short_name 4865 4866 4867 class ISO3166Utils(object): 4868 # From http://data.okfn.org/data/core/country-list 4869 _country_map = { 4870 'AF': 'Afghanistan', 4871 'AX': 'Åland Islands', 4872 'AL': 'Albania', 4873 'DZ': 'Algeria', 4874 'AS': 'American Samoa', 4875 'AD': 'Andorra', 4876 'AO': 'Angola', 4877 'AI': 'Anguilla', 4878 'AQ': 'Antarctica', 4879 'AG': 'Antigua and Barbuda', 4880 'AR': 'Argentina', 4881 'AM': 'Armenia', 4882 'AW': 'Aruba', 4883 'AU': 'Australia', 4884 'AT': 'Austria', 4885 'AZ': 'Azerbaijan', 4886 'BS': 'Bahamas', 4887 'BH': 'Bahrain', 4888 'BD': 'Bangladesh', 4889 'BB': 'Barbados', 4890 'BY': 'Belarus', 4891 'BE': 'Belgium', 4892 'BZ': 'Belize', 4893 'BJ': 'Benin', 4894 'BM': 'Bermuda', 4895 'BT': 'Bhutan', 4896 'BO': 'Bolivia, Plurinational State of', 4897 'BQ': 'Bonaire, Sint Eustatius and Saba', 4898 'BA': 'Bosnia and Herzegovina', 4899 'BW': 'Botswana', 4900 'BV': 'Bouvet Island', 4901 'BR': 'Brazil', 4902 'IO': 'British Indian Ocean Territory', 4903 'BN': 'Brunei Darussalam', 4904 'BG': 'Bulgaria', 4905 'BF': 'Burkina Faso', 4906 'BI': 'Burundi', 4907 'KH': 'Cambodia', 4908 'CM': 'Cameroon', 4909 'CA': 'Canada', 4910 'CV': 'Cape Verde', 4911 'KY': 'Cayman Islands', 4912 'CF': 'Central African Republic', 4913 'TD': 'Chad', 4914 'CL': 'Chile', 4915 'CN': 'China', 4916 'CX': 'Christmas Island', 4917 'CC': 'Cocos (Keeling) Islands', 4918 'CO': 'Colombia', 4919 'KM': 'Comoros', 4920 'CG': 'Congo', 4921 'CD': 'Congo, the Democratic Republic of the', 4922 'CK': 'Cook Islands', 4923 'CR': 'Costa Rica', 4924 'CI': 'Côte d\'Ivoire', 4925 'HR': 'Croatia', 4926 'CU': 'Cuba', 4927 'CW': 'Curaçao', 4928 'CY': 'Cyprus', 4929 'CZ': 'Czech Republic', 4930 'DK': 'Denmark', 4931 'DJ': 'Djibouti', 4932 'DM': 'Dominica', 4933 'DO': 'Dominican Republic', 4934 'EC': 'Ecuador', 4935 'EG': 'Egypt', 4936 'SV': 'El Salvador', 4937 'GQ': 'Equatorial Guinea', 4938 'ER': 'Eritrea', 4939 'EE': 'Estonia', 4940 'ET': 'Ethiopia', 4941 'FK': 'Falkland Islands (Malvinas)', 4942 'FO': 'Faroe Islands', 4943 'FJ': 'Fiji', 4944 'FI': 'Finland', 4945 'FR': 'France', 4946 'GF': 'French Guiana', 4947 'PF': 'French Polynesia', 4948 'TF': 'French Southern Territories', 4949 'GA': 'Gabon', 4950 'GM': 'Gambia', 4951 'GE': 'Georgia', 4952 'DE': 'Germany', 4953 'GH': 'Ghana', 4954 'GI': 'Gibraltar', 4955 'GR': 'Greece', 4956 'GL': 'Greenland', 4957 'GD': 'Grenada', 4958 'GP': 'Guadeloupe', 4959 'GU': 'Guam', 4960 'GT': 'Guatemala', 4961 'GG': 'Guernsey', 4962 'GN': 'Guinea', 4963 'GW': 'Guinea-Bissau', 4964 'GY': 'Guyana', 4965 'HT': 'Haiti', 4966 'HM': 'Heard Island and McDonald Islands', 4967 'VA': 'Holy See (Vatican City State)', 4968 'HN': 'Honduras', 4969 'HK': 'Hong Kong', 4970 'HU': 'Hungary', 4971 'IS': 'Iceland', 4972 'IN': 'India', 4973 'ID': 'Indonesia', 4974 'IR': 'Iran, Islamic Republic of', 4975 'IQ': 'Iraq', 4976 'IE': 'Ireland', 4977 'IM': 'Isle of Man', 4978 'IL': 'Israel', 4979 'IT': 'Italy', 4980 'JM': 'Jamaica', 4981 'JP': 'Japan', 4982 'JE': 'Jersey', 4983 'JO': 'Jordan', 4984 'KZ': 'Kazakhstan', 4985 'KE': 'Kenya', 4986 'KI': 'Kiribati', 4987 'KP': 'Korea, Democratic People\'s Republic of', 4988 'KR': 'Korea, Republic of', 4989 'KW': 'Kuwait', 4990 'KG': 'Kyrgyzstan', 4991 'LA': 'Lao People\'s Democratic Republic', 4992 'LV': 'Latvia', 4993 'LB': 'Lebanon', 4994 'LS': 'Lesotho', 4995 'LR': 'Liberia', 4996 'LY': 'Libya', 4997 'LI': 'Liechtenstein', 4998 'LT': 'Lithuania', 4999 'LU': 'Luxembourg', 5000 'MO': 'Macao', 5001 'MK': 'Macedonia, the Former Yugoslav Republic of', 5002 'MG': 'Madagascar', 5003 'MW': 'Malawi', 5004 'MY': 'Malaysia', 5005 'MV': 'Maldives', 5006 'ML': 'Mali', 5007 'MT': 'Malta', 5008 'MH': 'Marshall Islands', 5009 'MQ': 'Martinique', 5010 'MR': 'Mauritania', 5011 'MU': 'Mauritius', 5012 'YT': 'Mayotte', 5013 'MX': 'Mexico', 5014 'FM': 'Micronesia, Federated States of', 5015 'MD': 'Moldova, Republic of', 5016 'MC': 'Monaco', 5017 'MN': 'Mongolia', 5018 'ME': 'Montenegro', 5019 'MS': 'Montserrat', 5020 'MA': 'Morocco', 5021 'MZ': 'Mozambique', 5022 'MM': 'Myanmar', 5023 'NA': 'Namibia', 5024 'NR': 'Nauru', 5025 'NP': 'Nepal', 5026 'NL': 'Netherlands', 5027 'NC': 'New Caledonia', 5028 'NZ': 'New Zealand', 5029 'NI': 'Nicaragua', 5030 'NE': 'Niger', 5031 'NG': 'Nigeria', 5032 'NU': 'Niue', 5033 'NF': 'Norfolk Island', 5034 'MP': 'Northern Mariana Islands', 5035 'NO': 'Norway', 5036 'OM': 'Oman', 5037 'PK': 'Pakistan', 5038 'PW': 'Palau', 5039 'PS': 'Palestine, State of', 5040 'PA': 'Panama', 5041 'PG': 'Papua New Guinea', 5042 'PY': 'Paraguay', 5043 'PE': 'Peru', 5044 'PH': 'Philippines', 5045 'PN': 'Pitcairn', 5046 'PL': 'Poland', 5047 'PT': 'Portugal', 5048 'PR': 'Puerto Rico', 5049 'QA': 'Qatar', 5050 'RE': 'Réunion', 5051 'RO': 'Romania', 5052 'RU': 'Russian Federation', 5053 'RW': 'Rwanda', 5054 'BL': 'Saint Barthélemy', 5055 'SH': 'Saint Helena, Ascension and Tristan da Cunha', 5056 'KN': 'Saint Kitts and Nevis', 5057 'LC': 'Saint Lucia', 5058 'MF': 'Saint Martin (French part)', 5059 'PM': 'Saint Pierre and Miquelon', 5060 'VC': 'Saint Vincent and the Grenadines', 5061 'WS': 'Samoa', 5062 'SM': 'San Marino', 5063 'ST': 'Sao Tome and Principe', 5064 'SA': 'Saudi Arabia', 5065 'SN': 'Senegal', 5066 'RS': 'Serbia', 5067 'SC': 'Seychelles', 5068 'SL': 'Sierra Leone', 5069 'SG': 'Singapore', 5070 'SX': 'Sint Maarten (Dutch part)', 5071 'SK': 'Slovakia', 5072 'SI': 'Slovenia', 5073 'SB': 'Solomon Islands', 5074 'SO': 'Somalia', 5075 'ZA': 'South Africa', 5076 'GS': 'South Georgia and the South Sandwich Islands', 5077 'SS': 'South Sudan', 5078 'ES': 'Spain', 5079 'LK': 'Sri Lanka', 5080 'SD': 'Sudan', 5081 'SR': 'Suriname', 5082 'SJ': 'Svalbard and Jan Mayen', 5083 'SZ': 'Swaziland', 5084 'SE': 'Sweden', 5085 'CH': 'Switzerland', 5086 'SY': 'Syrian Arab Republic', 5087 'TW': 'Taiwan, Province of China', 5088 'TJ': 'Tajikistan', 5089 'TZ': 'Tanzania, United Republic of', 5090 'TH': 'Thailand', 5091 'TL': 'Timor-Leste', 5092 'TG': 'Togo', 5093 'TK': 'Tokelau', 5094 'TO': 'Tonga', 5095 'TT': 'Trinidad and Tobago', 5096 'TN': 'Tunisia', 5097 'TR': 'Turkey', 5098 'TM': 'Turkmenistan', 5099 'TC': 'Turks and Caicos Islands', 5100 'TV': 'Tuvalu', 5101 'UG': 'Uganda', 5102 'UA': 'Ukraine', 5103 'AE': 'United Arab Emirates', 5104 'GB': 'United Kingdom', 5105 'US': 'United States', 5106 'UM': 'United States Minor Outlying Islands', 5107 'UY': 'Uruguay', 5108 'UZ': 'Uzbekistan', 5109 'VU': 'Vanuatu', 5110 'VE': 'Venezuela, Bolivarian Republic of', 5111 'VN': 'Viet Nam', 5112 'VG': 'Virgin Islands, British', 5113 'VI': 'Virgin Islands, U.S.', 5114 'WF': 'Wallis and Futuna', 5115 'EH': 'Western Sahara', 5116 'YE': 'Yemen', 5117 'ZM': 'Zambia', 5118 'ZW': 'Zimbabwe', 5119 } 5120 5121 @classmethod 5122 def short2full(cls, code): 5123 """Convert an ISO 3166-2 country code to the corresponding full name""" 5124 return cls._country_map.get(code.upper()) 5125 5126 5127 class GeoUtils(object): 5128 # Major IPv4 address blocks per country 5129 _country_ip_map = { 5130 'AD': '46.172.224.0/19', 5131 'AE': '94.200.0.0/13', 5132 'AF': '149.54.0.0/17', 5133 'AG': '209.59.64.0/18', 5134 'AI': '204.14.248.0/21', 5135 'AL': '46.99.0.0/16', 5136 'AM': '46.70.0.0/15', 5137 'AO': '105.168.0.0/13', 5138 'AP': '182.50.184.0/21', 5139 'AQ': '23.154.160.0/24', 5140 'AR': '181.0.0.0/12', 5141 'AS': '202.70.112.0/20', 5142 'AT': '77.116.0.0/14', 5143 'AU': '1.128.0.0/11', 5144 'AW': '181.41.0.0/18', 5145 'AX': '185.217.4.0/22', 5146 'AZ': '5.197.0.0/16', 5147 'BA': '31.176.128.0/17', 5148 'BB': '65.48.128.0/17', 5149 'BD': '114.130.0.0/16', 5150 'BE': '57.0.0.0/8', 5151 'BF': '102.178.0.0/15', 5152 'BG': '95.42.0.0/15', 5153 'BH': '37.131.0.0/17', 5154 'BI': '154.117.192.0/18', 5155 'BJ': '137.255.0.0/16', 5156 'BL': '185.212.72.0/23', 5157 'BM': '196.12.64.0/18', 5158 'BN': '156.31.0.0/16', 5159 'BO': '161.56.0.0/16', 5160 'BQ': '161.0.80.0/20', 5161 'BR': '191.128.0.0/12', 5162 'BS': '24.51.64.0/18', 5163 'BT': '119.2.96.0/19', 5164 'BW': '168.167.0.0/16', 5165 'BY': '178.120.0.0/13', 5166 'BZ': '179.42.192.0/18', 5167 'CA': '99.224.0.0/11', 5168 'CD': '41.243.0.0/16', 5169 'CF': '197.242.176.0/21', 5170 'CG': '160.113.0.0/16', 5171 'CH': '85.0.0.0/13', 5172 'CI': '102.136.0.0/14', 5173 'CK': '202.65.32.0/19', 5174 'CL': '152.172.0.0/14', 5175 'CM': '102.244.0.0/14', 5176 'CN': '36.128.0.0/10', 5177 'CO': '181.240.0.0/12', 5178 'CR': '201.192.0.0/12', 5179 'CU': '152.206.0.0/15', 5180 'CV': '165.90.96.0/19', 5181 'CW': '190.88.128.0/17', 5182 'CY': '31.153.0.0/16', 5183 'CZ': '88.100.0.0/14', 5184 'DE': '53.0.0.0/8', 5185 'DJ': '197.241.0.0/17', 5186 'DK': '87.48.0.0/12', 5187 'DM': '192.243.48.0/20', 5188 'DO': '152.166.0.0/15', 5189 'DZ': '41.96.0.0/12', 5190 'EC': '186.68.0.0/15', 5191 'EE': '90.190.0.0/15', 5192 'EG': '156.160.0.0/11', 5193 'ER': '196.200.96.0/20', 5194 'ES': '88.0.0.0/11', 5195 'ET': '196.188.0.0/14', 5196 'EU': '2.16.0.0/13', 5197 'FI': '91.152.0.0/13', 5198 'FJ': '144.120.0.0/16', 5199 'FK': '80.73.208.0/21', 5200 'FM': '119.252.112.0/20', 5201 'FO': '88.85.32.0/19', 5202 'FR': '90.0.0.0/9', 5203 'GA': '41.158.0.0/15', 5204 'GB': '25.0.0.0/8', 5205 'GD': '74.122.88.0/21', 5206 'GE': '31.146.0.0/16', 5207 'GF': '161.22.64.0/18', 5208 'GG': '62.68.160.0/19', 5209 'GH': '154.160.0.0/12', 5210 'GI': '95.164.0.0/16', 5211 'GL': '88.83.0.0/19', 5212 'GM': '160.182.0.0/15', 5213 'GN': '197.149.192.0/18', 5214 'GP': '104.250.0.0/19', 5215 'GQ': '105.235.224.0/20', 5216 'GR': '94.64.0.0/13', 5217 'GT': '168.234.0.0/16', 5218 'GU': '168.123.0.0/16', 5219 'GW': '197.214.80.0/20', 5220 'GY': '181.41.64.0/18', 5221 'HK': '113.252.0.0/14', 5222 'HN': '181.210.0.0/16', 5223 'HR': '93.136.0.0/13', 5224 'HT': '148.102.128.0/17', 5225 'HU': '84.0.0.0/14', 5226 'ID': '39.192.0.0/10', 5227 'IE': '87.32.0.0/12', 5228 'IL': '79.176.0.0/13', 5229 'IM': '5.62.80.0/20', 5230 'IN': '117.192.0.0/10', 5231 'IO': '203.83.48.0/21', 5232 'IQ': '37.236.0.0/14', 5233 'IR': '2.176.0.0/12', 5234 'IS': '82.221.0.0/16', 5235 'IT': '79.0.0.0/10', 5236 'JE': '87.244.64.0/18', 5237 'JM': '72.27.0.0/17', 5238 'JO': '176.29.0.0/16', 5239 'JP': '133.0.0.0/8', 5240 'KE': '105.48.0.0/12', 5241 'KG': '158.181.128.0/17', 5242 'KH': '36.37.128.0/17', 5243 'KI': '103.25.140.0/22', 5244 'KM': '197.255.224.0/20', 5245 'KN': '198.167.192.0/19', 5246 'KP': '175.45.176.0/22', 5247 'KR': '175.192.0.0/10', 5248 'KW': '37.36.0.0/14', 5249 'KY': '64.96.0.0/15', 5250 'KZ': '2.72.0.0/13', 5251 'LA': '115.84.64.0/18', 5252 'LB': '178.135.0.0/16', 5253 'LC': '24.92.144.0/20', 5254 'LI': '82.117.0.0/19', 5255 'LK': '112.134.0.0/15', 5256 'LR': '102.183.0.0/16', 5257 'LS': '129.232.0.0/17', 5258 'LT': '78.56.0.0/13', 5259 'LU': '188.42.0.0/16', 5260 'LV': '46.109.0.0/16', 5261 'LY': '41.252.0.0/14', 5262 'MA': '105.128.0.0/11', 5263 'MC': '88.209.64.0/18', 5264 'MD': '37.246.0.0/16', 5265 'ME': '178.175.0.0/17', 5266 'MF': '74.112.232.0/21', 5267 'MG': '154.126.0.0/17', 5268 'MH': '117.103.88.0/21', 5269 'MK': '77.28.0.0/15', 5270 'ML': '154.118.128.0/18', 5271 'MM': '37.111.0.0/17', 5272 'MN': '49.0.128.0/17', 5273 'MO': '60.246.0.0/16', 5274 'MP': '202.88.64.0/20', 5275 'MQ': '109.203.224.0/19', 5276 'MR': '41.188.64.0/18', 5277 'MS': '208.90.112.0/22', 5278 'MT': '46.11.0.0/16', 5279 'MU': '105.16.0.0/12', 5280 'MV': '27.114.128.0/18', 5281 'MW': '102.70.0.0/15', 5282 'MX': '187.192.0.0/11', 5283 'MY': '175.136.0.0/13', 5284 'MZ': '197.218.0.0/15', 5285 'NA': '41.182.0.0/16', 5286 'NC': '101.101.0.0/18', 5287 'NE': '197.214.0.0/18', 5288 'NF': '203.17.240.0/22', 5289 'NG': '105.112.0.0/12', 5290 'NI': '186.76.0.0/15', 5291 'NL': '145.96.0.0/11', 5292 'NO': '84.208.0.0/13', 5293 'NP': '36.252.0.0/15', 5294 'NR': '203.98.224.0/19', 5295 'NU': '49.156.48.0/22', 5296 'NZ': '49.224.0.0/14', 5297 'OM': '5.36.0.0/15', 5298 'PA': '186.72.0.0/15', 5299 'PE': '186.160.0.0/14', 5300 'PF': '123.50.64.0/18', 5301 'PG': '124.240.192.0/19', 5302 'PH': '49.144.0.0/13', 5303 'PK': '39.32.0.0/11', 5304 'PL': '83.0.0.0/11', 5305 'PM': '70.36.0.0/20', 5306 'PR': '66.50.0.0/16', 5307 'PS': '188.161.0.0/16', 5308 'PT': '85.240.0.0/13', 5309 'PW': '202.124.224.0/20', 5310 'PY': '181.120.0.0/14', 5311 'QA': '37.210.0.0/15', 5312 'RE': '102.35.0.0/16', 5313 'RO': '79.112.0.0/13', 5314 'RS': '93.86.0.0/15', 5315 'RU': '5.136.0.0/13', 5316 'RW': '41.186.0.0/16', 5317 'SA': '188.48.0.0/13', 5318 'SB': '202.1.160.0/19', 5319 'SC': '154.192.0.0/11', 5320 'SD': '102.120.0.0/13', 5321 'SE': '78.64.0.0/12', 5322 'SG': '8.128.0.0/10', 5323 'SI': '188.196.0.0/14', 5324 'SK': '78.98.0.0/15', 5325 'SL': '102.143.0.0/17', 5326 'SM': '89.186.32.0/19', 5327 'SN': '41.82.0.0/15', 5328 'SO': '154.115.192.0/18', 5329 'SR': '186.179.128.0/17', 5330 'SS': '105.235.208.0/21', 5331 'ST': '197.159.160.0/19', 5332 'SV': '168.243.0.0/16', 5333 'SX': '190.102.0.0/20', 5334 'SY': '5.0.0.0/16', 5335 'SZ': '41.84.224.0/19', 5336 'TC': '65.255.48.0/20', 5337 'TD': '154.68.128.0/19', 5338 'TG': '196.168.0.0/14', 5339 'TH': '171.96.0.0/13', 5340 'TJ': '85.9.128.0/18', 5341 'TK': '27.96.24.0/21', 5342 'TL': '180.189.160.0/20', 5343 'TM': '95.85.96.0/19', 5344 'TN': '197.0.0.0/11', 5345 'TO': '175.176.144.0/21', 5346 'TR': '78.160.0.0/11', 5347 'TT': '186.44.0.0/15', 5348 'TV': '202.2.96.0/19', 5349 'TW': '120.96.0.0/11', 5350 'TZ': '156.156.0.0/14', 5351 'UA': '37.52.0.0/14', 5352 'UG': '102.80.0.0/13', 5353 'US': '6.0.0.0/8', 5354 'UY': '167.56.0.0/13', 5355 'UZ': '84.54.64.0/18', 5356 'VA': '212.77.0.0/19', 5357 'VC': '207.191.240.0/21', 5358 'VE': '186.88.0.0/13', 5359 'VG': '66.81.192.0/20', 5360 'VI': '146.226.0.0/16', 5361 'VN': '14.160.0.0/11', 5362 'VU': '202.80.32.0/20', 5363 'WF': '117.20.32.0/21', 5364 'WS': '202.4.32.0/19', 5365 'YE': '134.35.0.0/16', 5366 'YT': '41.242.116.0/22', 5367 'ZA': '41.0.0.0/11', 5368 'ZM': '102.144.0.0/13', 5369 'ZW': '102.177.192.0/18', 5370 } 5371 5372 @classmethod 5373 def random_ipv4(cls, code_or_block): 5374 if len(code_or_block) == 2: 5375 block = cls._country_ip_map.get(code_or_block.upper()) 5376 if not block: 5377 return None 5378 else: 5379 block = code_or_block 5380 addr, preflen = block.split('/') 5381 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0] 5382 addr_max = addr_min | (0xffffffff >> int(preflen)) 5383 return compat_str(socket.inet_ntoa( 5384 compat_struct_pack('!L', random.randint(addr_min, addr_max)))) 5385 5386 5387 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): 5388 def __init__(self, proxies=None): 5389 # Set default handlers 5390 for type in ('http', 'https'): 5391 setattr(self, '%s_open' % type, 5392 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open: 5393 meth(r, proxy, type)) 5394 compat_urllib_request.ProxyHandler.__init__(self, proxies) 5395 5396 def proxy_open(self, req, proxy, type): 5397 req_proxy = req.headers.get('Ytdl-request-proxy') 5398 if req_proxy is not None: 5399 proxy = req_proxy 5400 del req.headers['Ytdl-request-proxy'] 5401 5402 if proxy == '__noproxy__': 5403 return None # No Proxy 5404 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'): 5405 req.add_header('Ytdl-socks-proxy', proxy) 5406 # youtube-dl's http/https handlers do wrapping the socket with socks 5407 return None 5408 return compat_urllib_request.ProxyHandler.proxy_open( 5409 self, req, proxy, type) 5410 5411 5412 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is 5413 # released into Public Domain 5414 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387 5415 5416 def long_to_bytes(n, blocksize=0): 5417 """long_to_bytes(n:long, blocksize:int) : string 5418 Convert a long integer to a byte string. 5419 5420 If optional blocksize is given and greater than zero, pad the front of the 5421 byte string with binary zeros so that the length is a multiple of 5422 blocksize. 5423 """ 5424 # after much testing, this algorithm was deemed to be the fastest 5425 s = b'' 5426 n = int(n) 5427 while n > 0: 5428 s = compat_struct_pack('>I', n & 0xffffffff) + s 5429 n = n >> 32 5430 # strip off leading zeros 5431 for i in range(len(s)): 5432 if s[i] != b'\000'[0]: 5433 break 5434 else: 5435 # only happens when n == 0 5436 s = b'\000' 5437 i = 0 5438 s = s[i:] 5439 # add back some pad bytes. this could be done more efficiently w.r.t. the 5440 # de-padding being done above, but sigh... 5441 if blocksize > 0 and len(s) % blocksize: 5442 s = (blocksize - len(s) % blocksize) * b'\000' + s 5443 return s 5444 5445 5446 def bytes_to_long(s): 5447 """bytes_to_long(string) : long 5448 Convert a byte string to a long integer. 5449 5450 This is (essentially) the inverse of long_to_bytes(). 5451 """ 5452 acc = 0 5453 length = len(s) 5454 if length % 4: 5455 extra = (4 - length % 4) 5456 s = b'\000' * extra + s 5457 length = length + extra 5458 for i in range(0, length, 4): 5459 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0] 5460 return acc 5461 5462 5463 def ohdave_rsa_encrypt(data, exponent, modulus): 5464 ''' 5465 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/ 5466 5467 Input: 5468 data: data to encrypt, bytes-like object 5469 exponent, modulus: parameter e and N of RSA algorithm, both integer 5470 Output: hex string of encrypted data 5471 5472 Limitation: supports one block encryption only 5473 ''' 5474 5475 payload = int(binascii.hexlify(data[::-1]), 16) 5476 encrypted = pow(payload, exponent, modulus) 5477 return '%x' % encrypted 5478 5479 5480 def pkcs1pad(data, length): 5481 """ 5482 Padding input data with PKCS#1 scheme 5483 5484 @param {int[]} data input data 5485 @param {int} length target length 5486 @returns {int[]} padded data 5487 """ 5488 if len(data) > length - 11: 5489 raise ValueError('Input data too long for PKCS#1 padding') 5490 5491 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)] 5492 return [0, 2] + pseudo_random + [0] + data 5493 5494 5495 def encode_base_n(num, n, table=None): 5496 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' 5497 if not table: 5498 table = FULL_TABLE[:n] 5499 5500 if n > len(table): 5501 raise ValueError('base %d exceeds table length %d' % (n, len(table))) 5502 5503 if num == 0: 5504 return table[0] 5505 5506 ret = '' 5507 while num: 5508 ret = table[num % n] + ret 5509 num = num // n 5510 return ret 5511 5512 5513 def decode_packed_codes(code): 5514 mobj = re.search(PACKED_CODES_RE, code) 5515 obfuscated_code, base, count, symbols = mobj.groups() 5516 base = int(base) 5517 count = int(count) 5518 symbols = symbols.split('|') 5519 symbol_table = {} 5520 5521 while count: 5522 count -= 1 5523 base_n_count = encode_base_n(count, base) 5524 symbol_table[base_n_count] = symbols[count] or base_n_count 5525 5526 return re.sub( 5527 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)], 5528 obfuscated_code) 5529 5530 5531 def caesar(s, alphabet, shift): 5532 if shift == 0: 5533 return s 5534 l = len(alphabet) 5535 return ''.join( 5536 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c 5537 for c in s) 5538 5539 5540 def rot47(s): 5541 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47) 5542 5543 5544 def parse_m3u8_attributes(attrib): 5545 info = {} 5546 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib): 5547 if val.startswith('"'): 5548 val = val[1:-1] 5549 info[key] = val 5550 return info 5551 5552 5553 def urshift(val, n): 5554 return val >> n if val >= 0 else (val + 0x100000000) >> n 5555 5556 5557 # Based on png2str() written by @gdkchan and improved by @yokrysty 5558 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706 5559 def decode_png(png_data): 5560 # Reference: https://www.w3.org/TR/PNG/ 5561 header = png_data[8:] 5562 5563 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR': 5564 raise IOError('Not a valid PNG file.') 5565 5566 int_map = {1: '>B', 2: '>H', 4: '>I'} 5567 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0] 5568 5569 chunks = [] 5570 5571 while header: 5572 length = unpack_integer(header[:4]) 5573 header = header[4:] 5574 5575 chunk_type = header[:4] 5576 header = header[4:] 5577 5578 chunk_data = header[:length] 5579 header = header[length:] 5580 5581 header = header[4:] # Skip CRC 5582 5583 chunks.append({ 5584 'type': chunk_type, 5585 'length': length, 5586 'data': chunk_data 5587 }) 5588 5589 ihdr = chunks[0]['data'] 5590 5591 width = unpack_integer(ihdr[:4]) 5592 height = unpack_integer(ihdr[4:8]) 5593 5594 idat = b'' 5595 5596 for chunk in chunks: 5597 if chunk['type'] == b'IDAT': 5598 idat += chunk['data'] 5599 5600 if not idat: 5601 raise IOError('Unable to read PNG data.') 5602 5603 decompressed_data = bytearray(zlib.decompress(idat)) 5604 5605 stride = width * 3 5606 pixels = [] 5607 5608 def _get_pixel(idx): 5609 x = idx % stride 5610 y = idx // stride 5611 return pixels[y][x] 5612 5613 for y in range(height): 5614 basePos = y * (1 + stride) 5615 filter_type = decompressed_data[basePos] 5616 5617 current_row = [] 5618 5619 pixels.append(current_row) 5620 5621 for x in range(stride): 5622 color = decompressed_data[1 + basePos + x] 5623 basex = y * stride + x 5624 left = 0 5625 up = 0 5626 5627 if x > 2: 5628 left = _get_pixel(basex - 3) 5629 if y > 0: 5630 up = _get_pixel(basex - stride) 5631 5632 if filter_type == 1: # Sub 5633 color = (color + left) & 0xff 5634 elif filter_type == 2: # Up 5635 color = (color + up) & 0xff 5636 elif filter_type == 3: # Average 5637 color = (color + ((left + up) >> 1)) & 0xff 5638 elif filter_type == 4: # Paeth 5639 a = left 5640 b = up 5641 c = 0 5642 5643 if x > 2 and y > 0: 5644 c = _get_pixel(basex - stride - 3) 5645 5646 p = a + b - c 5647 5648 pa = abs(p - a) 5649 pb = abs(p - b) 5650 pc = abs(p - c) 5651 5652 if pa <= pb and pa <= pc: 5653 color = (color + a) & 0xff 5654 elif pb <= pc: 5655 color = (color + b) & 0xff 5656 else: 5657 color = (color + c) & 0xff 5658 5659 current_row.append(color) 5660 5661 return width, height, pixels 5662 5663 5664 def write_xattr(path, key, value): 5665 # This mess below finds the best xattr tool for the job 5666 try: 5667 # try the pyxattr module... 5668 import xattr 5669 5670 if hasattr(xattr, 'set'): # pyxattr 5671 # Unicode arguments are not supported in python-pyxattr until 5672 # version 0.5.0 5673 # See https://github.com/ytdl-org/youtube-dl/issues/5498 5674 pyxattr_required_version = '0.5.0' 5675 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version): 5676 # TODO: fallback to CLI tools 5677 raise XAttrUnavailableError( 5678 'python-pyxattr is detected but is too old. ' 5679 'youtube-dl requires %s or above while your version is %s. ' 5680 'Falling back to other xattr implementations' % ( 5681 pyxattr_required_version, xattr.__version__)) 5682 5683 setxattr = xattr.set 5684 else: # xattr 5685 setxattr = xattr.setxattr 5686 5687 try: 5688 setxattr(path, key, value) 5689 except EnvironmentError as e: 5690 raise XAttrMetadataError(e.errno, e.strerror) 5691 5692 except ImportError: 5693 if compat_os_name == 'nt': 5694 # Write xattrs to NTFS Alternate Data Streams: 5695 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29 5696 assert ':' not in key 5697 assert os.path.exists(path) 5698 5699 ads_fn = path + ':' + key 5700 try: 5701 with open(ads_fn, 'wb') as f: 5702 f.write(value) 5703 except EnvironmentError as e: 5704 raise XAttrMetadataError(e.errno, e.strerror) 5705 else: 5706 user_has_setfattr = check_executable('setfattr', ['--version']) 5707 user_has_xattr = check_executable('xattr', ['-h']) 5708 5709 if user_has_setfattr or user_has_xattr: 5710 5711 value = value.decode('utf-8') 5712 if user_has_setfattr: 5713 executable = 'setfattr' 5714 opts = ['-n', key, '-v', value] 5715 elif user_has_xattr: 5716 executable = 'xattr' 5717 opts = ['-w', key, value] 5718 5719 cmd = ([encodeFilename(executable, True)] 5720 + [encodeArgument(o) for o in opts] 5721 + [encodeFilename(path, True)]) 5722 5723 try: 5724 p = subprocess.Popen( 5725 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) 5726 except EnvironmentError as e: 5727 raise XAttrMetadataError(e.errno, e.strerror) 5728 stdout, stderr = p.communicate() 5729 stderr = stderr.decode('utf-8', 'replace') 5730 if p.returncode != 0: 5731 raise XAttrMetadataError(p.returncode, stderr) 5732 5733 else: 5734 # On Unix, and can't find pyxattr, setfattr, or xattr. 5735 if sys.platform.startswith('linux'): 5736 raise XAttrUnavailableError( 5737 "Couldn't find a tool to set the xattrs. " 5738 "Install either the python 'pyxattr' or 'xattr' " 5739 "modules, or the GNU 'attr' package " 5740 "(which contains the 'setfattr' tool).") 5741 else: 5742 raise XAttrUnavailableError( 5743 "Couldn't find a tool to set the xattrs. " 5744 "Install either the python 'xattr' module, " 5745 "or the 'xattr' binary.") 5746 5747 5748 def random_birthday(year_field, month_field, day_field): 5749 start_date = datetime.date(1950, 1, 1) 5750 end_date = datetime.date(1995, 12, 31) 5751 offset = random.randint(0, (end_date - start_date).days) 5752 random_date = start_date + datetime.timedelta(offset) 5753 return { 5754 year_field: str(random_date.year), 5755 month_field: str(random_date.month), 5756 day_field: str(random_date.day), 5757 } 5758 5759 5760 def clean_podcast_url(url): 5761 return re.sub(r'''(?x) 5762 (?: 5763 (?: 5764 chtbl\.com/track| 5765 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/ 5766 play\.podtrac\.com 5767 )/[^/]+| 5768 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure 5769 flex\.acast\.com| 5770 pd(?: 5771 cn\.co| # https://podcorn.com/analytics-prefix/ 5772 st\.fm # https://podsights.com/docs/ 5773 )/e 5774 )/''', '', url)