From: Philipp Hagemeister <phihag@phihag.de>
Date: Tue, 27 Nov 2012 16:14:29 +0000 (+0100)
Subject: Merge remote-tracking branch 'alab1001101/master'
X-Git-Url: http://git.oshgnacknak.de/?a=commitdiff_plain;h=33d94a6c999ae784be7529aaaea42adadeab0c27;p=youtube-dl

Merge remote-tracking branch 'alab1001101/master'
---

33d94a6c999ae784be7529aaaea42adadeab0c27
diff --cc youtube_dl/InfoExtractors.py
index 13b04ab5b,82459e7a8..cea30dad8
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@@ -1140,6 -1077,161 +1140,143 @@@ class VimeoIE(InfoExtractor)
  		}]
  
  
+ class ArteTvIE(InfoExtractor):
 -        """arte.tv information extractor."""
 -
 -        _VALID_URL = r'(?:http://)?videos\.arte\.tv/(?:fr|de)/videos/.*'
 -        _LIVE_URL = r'index-[0-9]+\.html$'
 -
 -        IE_NAME = u'arte.tv'
 -
 -        def __init__(self, downloader=None):
 -                InfoExtractor.__init__(self, downloader)
 -
 -        def report_download_webpage(self, video_id):
 -                """Report webpage download."""
 -                self._downloader.to_screen(u'[arte.tv] %s: Downloading webpage' % video_id)
 -
 -        def report_extraction(self, video_id):
 -                """Report information extraction."""
 -                self._downloader.to_screen(u'[arte.tv] %s: Extracting information' % video_id)
 -
 -        def fetch_webpage(self, url):
 -                self._downloader.increment_downloads()
 -                request = urllib2.Request(url)
 -                try:
 -                        self.report_download_webpage(url)
 -                        webpage = urllib2.urlopen(request).read()
 -                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 -                        self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
 -                        return
 -                except ValueError, err:
 -                        self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
 -                        return
 -                return webpage
 -
 -        def grep_webpage(self, url, regex, regexFlags, matchTuples):
 -                page = self.fetch_webpage(url)
 -                mobj = re.search(regex, page, regexFlags)
 -                info = {}
 -
 -                if mobj is None:
 -                    self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
 -                    return
 -
 -                for (i, key, err) in matchTuples:
 -                    if mobj.group(i) is None:
 -                        self._downloader.trouble(err)
 -                        return
 -                    else:
 -                        info[key] = mobj.group(i)
 -
 -                return info
 -
 -        def extractLiveStream(self, url):
 -
 -                video_lang = url.split('/')[-4]
 -
 -                info = self.grep_webpage(
 -                    url,
 -                    r'src="(.*?/videothek_js.*?\.js)',
 -                    0,
 -                    [
 -                        (1, 'url', u'ERROR: Invalid URL: %s' % url)
 -                    ]
 -                )
 -
 -                http_host = url.split('/')[2]
 -                next_url = 'http://%s%s' % (http_host, urllib.unquote(info.get('url')))
 -
 -                info = self.grep_webpage(
 -                    next_url,
 -                    r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' +
 -                     '(http://.*?\.swf).*?' +
 -                     '(rtmp://.*?)\'',
 -                    re.DOTALL,
 -                    [
 -                        (1, 'path',   u'ERROR: could not extract video path: %s' % url),
 -                        (2, 'player', u'ERROR: could not extract video player: %s' % url),
 -                        (3, 'url',    u'ERROR: could not extract video url: %s' % url)
 -                    ]
 -                )
 -
 -                video_url = u'%s/%s' % (info.get('url'), info.get('path'))
 -
 -                print u'rtmpdump --swfVfy \'%s\' --rtmp \'%s\' --live -o arte-live.mp4' % (info.get('player'), video_url)
 -
 -        def extractPlus7Stream(self, url):
 -
 -                video_lang = url.split('/')[-3]
 -
 -                info = self.grep_webpage(
 -                    url,
 -                    r'param name="movie".*?videorefFileUrl=(http[^\'"&]*)',
 -                    0,
 -                    [
 -                        (1, 'url', u'ERROR: Invalid URL: %s' % url)
 -                    ]
 -                )
 -
 -                next_url = urllib.unquote(info.get('url'))
 -
 -                info = self.grep_webpage(
 -                    next_url,
 -                    r'<video lang="%s" ref="(http[^\'"&]*)' % video_lang,
 -                    0,
 -                    [
 -                        (1, 'url', u'ERROR: Could not find <video> tag: %s' % url)
 -                    ]
 -                )
 -
 -                next_url = urllib.unquote(info.get('url'))
 -
 -                info = self.grep_webpage(
 -                    next_url,
 -                    r'<video id="(.*?)".*?>.*?' +
 -                     '<name>(.*?)</name>.*?' +
 -                     '<dateVideo>(.*?)</dateVideo>.*?' +
 -                     '<url quality="hd">(.*?)</url>',
 -                    re.DOTALL,
 -                    [
 -                        (1, 'id',    u'ERROR: could not extract video id: %s' % url),
 -                        (2, 'title', u'ERROR: could not extract video title: %s' % url),
 -                        (3, 'date',  u'ERROR: could not extract video date: %s' % url),
 -                        (4, 'url',   u'ERROR: could not extract video url: %s' % url)
 -                    ]
 -                )
 -
 -                return {
 -                    'id':           info.get('id'),
 -                    'url':          urllib.unquote(info.get('url')),
 -                    'uploader':     u'arte.tv',
 -                    'upload_date':  info.get('date'),
 -                    'title':        info.get('title'),
 -                    'ext':          u'mp4',
 -                    'format':       u'NA',
 -                    'player_url':   None,
 -                }
 -
 -        def _real_extract(self, url):
 -
 -                video_id = url.split('/')[-1]
 -
 -                self.report_extraction(video_id)
 -
 -                if re.search(self._LIVE_URL, video_id) is not None:
 -                    self.extractLiveStream(url)
 -                    return
 -                else:
 -                    info = self.extractPlus7Stream(url)
 -
 -                try:
 -                        # Process video information
 -                        self._downloader.process_info(info)
 -                except UnavailableVideoError, err:
 -                        self._downloader.trouble(u'\nERROR: unable to download video')
++	"""arte.tv information extractor."""
++
++	_VALID_URL = r'(?:http://)?videos\.arte\.tv/(?:fr|de)/videos/.*'
++	_LIVE_URL = r'index-[0-9]+\.html$'
++
++	IE_NAME = u'arte.tv'
++
++	def __init__(self, downloader=None):
++		InfoExtractor.__init__(self, downloader)
++
++	def report_download_webpage(self, video_id):
++		"""Report webpage download."""
++		self._downloader.to_screen(u'[arte.tv] %s: Downloading webpage' % video_id)
++
++	def report_extraction(self, video_id):
++		"""Report information extraction."""
++		self._downloader.to_screen(u'[arte.tv] %s: Extracting information' % video_id)
++
++	def fetch_webpage(self, url):
++		self._downloader.increment_downloads()
++		request = urllib2.Request(url)
++		try:
++			self.report_download_webpage(url)
++			webpage = urllib2.urlopen(request).read()
++		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
++			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
++			return
++		except ValueError, err:
++			self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
++			return
++		return webpage
++
++	def grep_webpage(self, url, regex, regexFlags, matchTuples):
++		page = self.fetch_webpage(url)
++		mobj = re.search(regex, page, regexFlags)
++		info = {}
++
++		if mobj is None:
++			self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
++			return
++
++		for (i, key, err) in matchTuples:
++			if mobj.group(i) is None:
++				self._downloader.trouble(err)
++				return
++			else:
++				info[key] = mobj.group(i)
++
++		return info
++
++	def extractLiveStream(self, url):
++		video_lang = url.split('/')[-4]
++		info = self.grep_webpage(
++			url,
++			r'src="(.*?/videothek_js.*?\.js)',
++			0,
++			[
++				(1, 'url', u'ERROR: Invalid URL: %s' % url)
++			]
++		)
++		http_host = url.split('/')[2]
++		next_url = 'http://%s%s' % (http_host, urllib.unquote(info.get('url')))
++		info = self.grep_webpage(
++			next_url,
++			r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' +
++				'(http://.*?\.swf).*?' +
++				'(rtmp://.*?)\'',
++			re.DOTALL,
++			[
++				(1, 'path',   u'ERROR: could not extract video path: %s' % url),
++				(2, 'player', u'ERROR: could not extract video player: %s' % url),
++				(3, 'url',    u'ERROR: could not extract video url: %s' % url)
++			]
++		)
++		video_url = u'%s/%s' % (info.get('url'), info.get('path'))
++
++	def extractPlus7Stream(self, url):
++		video_lang = url.split('/')[-3]
++		info = self.grep_webpage(
++			url,
++			r'param name="movie".*?videorefFileUrl=(http[^\'"&]*)',
++			0,
++			[
++				(1, 'url', u'ERROR: Invalid URL: %s' % url)
++			]
++		)
++		next_url = urllib.unquote(info.get('url'))
++		info = self.grep_webpage(
++			next_url,
++			r'<video lang="%s" ref="(http[^\'"&]*)' % video_lang,
++			0,
++			[
++				(1, 'url', u'ERROR: Could not find <video> tag: %s' % url)
++			]
++		)
++		next_url = urllib.unquote(info.get('url'))
++
++		info = self.grep_webpage(
++			next_url,
++			r'<video id="(.*?)".*?>.*?' +
++				'<name>(.*?)</name>.*?' +
++				'<dateVideo>(.*?)</dateVideo>.*?' +
++				'<url quality="hd">(.*?)</url>',
++			re.DOTALL,
++			[
++				(1, 'id',    u'ERROR: could not extract video id: %s' % url),
++				(2, 'title', u'ERROR: could not extract video title: %s' % url),
++				(3, 'date',  u'ERROR: could not extract video date: %s' % url),
++				(4, 'url',   u'ERROR: could not extract video url: %s' % url)
++			]
++		)
++
++		return {
++			'id':           info.get('id'),
++			'url':          urllib.unquote(info.get('url')),
++			'uploader':     u'arte.tv',
++			'upload_date':  info.get('date'),
++			'title':        info.get('title'),
++			'ext':          u'mp4',
++			'format':       u'NA',
++			'player_url':   None,
++		}
++
++	def _real_extract(self, url):
++		video_id = url.split('/')[-1]
++		self.report_extraction(video_id)
++
++		if re.search(self._LIVE_URL, video_id) is not None:
++			self.extractLiveStream(url)
++			return
++		else:
++			info = self.extractPlus7Stream(url)
++
++		return [info]
+ 
+ 
  class GenericIE(InfoExtractor):
  	"""Generic last-resort information extractor."""
  
diff --cc youtube_dl/__init__.py
index 92478aa6b,13cf77896..f7a49e13a
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@@ -364,10 -351,8 +364,10 @@@ def gen_extractors()
  		MixcloudIE(),
  		StanfordOpenClassroomIE(),
  		MTVIE(),
 -                ArteTvIE(),
 -
 +		YoukuIE(),
 +		XNXXIE(),
 +		GooglePlusIE(),
- 
++		ArteTvIE(),
  		GenericIE()
  	]