Fixed 4tube.com extractor to pull metadata from associated Javascript and not the...

author kitty <magicvidyakitty@gmail.com>

Thu, 8 Oct 2015 13:31:23 +0000 (06:31 -0700)

committer Sergey M․ <dstftw@gmail.com>

Thu, 8 Oct 2015 15:05:16 +0000 (21:05 +0600)
author kitty <magicvidyakitty@gmail.com>
Thu, 8 Oct 2015 13:31:23 +0000 (06:31 -0700)
committer Sergey M․ <dstftw@gmail.com>
Thu, 8 Oct 2015 15:05:16 +0000 (21:05 +0600)
diff --git a/youtube_dl/extractor/fourtube.py b/youtube_dl/extractor/fourtube.py

index 3bb4f6239aceccd86dbb1dd4e9eafeace7ddad88..226ee67f072e3f783015e3fbc737ff08955d3b5a 100644 (file)
--- a/youtube_dl/extractor/fourtube.py
+++ b/youtube_dl/extractor/fourtube.py
@@ -45,11 +45,9 @@ class FourTubeIE(InfoExtractor):
              'uploadDate', webpage))
          thumbnail = self._html_search_meta('thumbnailUrl', webpage)
          uploader_id = self._html_search_regex(
-            r'<a class="img-avatar" href="[^"]+/channels/([^/"]+)" title="Go to [^"]+ page">',
-            webpage, 'uploader id')
+            r'<a class="img-avatar" href="[^"]+/users/([^/"]+)" title="Go to [^"]+ page">', webpage, 'uploader id')
          uploader = self._html_search_regex(
-            r'<a class="img-avatar" href="[^"]+/channels/[^/"]+" title="Go to ([^"]+) page">',
-            webpage, 'uploader')
+            r'<a class="img-avatar" href="[^"]+/users/[^/"]+" title="Go to ([^"]+) page">', webpage, 'uploader')
  
          categories_html = self._search_regex(
              r'(?s)><i class="icon icon-tag"></i>\s*Categories / Tags\s*.*?<ul class="list">(.*?)</ul>',
@@ -68,9 +66,12 @@ class FourTubeIE(InfoExtractor):
              webpage, 'like count', fatal=False))
          duration = parse_duration(self._html_search_meta('duration', webpage))
  
+        player_url = self._search_regex(r'<script id="playerembed" src="([^"]+)">',webpage,'player javascript')
+        player_js = self._download_webpage(player_url,video_id,'Downloading player Javascript')
+
          params_js = self._search_regex(
              r'\$\.ajax\(url,\ opts\);\s*\}\s*\}\)\(([0-9,\[\] ]+)\)',
-            webpage, 'initialization parameters'
+            player_js, 'initialization parameters'
          )
          params = self._parse_json('[%s]' % params_js, video_id)
          media_id = params[0]
author	kitty <magicvidyakitty@gmail.com>
	Thu, 8 Oct 2015 13:31:23 +0000 (06:31 -0700)
committer	Sergey M․ <dstftw@gmail.com>
	Thu, 8 Oct 2015 15:05:16 +0000 (21:05 +0600)