"info_dict":{
"title":"Tired of Link Building? Try BacklinkMyDomain.com!"
}
+ },
+ {
+ "name": "Ina",
+ "url": "www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html",
+ "file": "I12055569.mp4",
+ "md5": "a667021bf2b41f8dc6049479d9bb38a3",
+ "info_dict":{
+ "title":"François Hollande \"Je crois que c'est clair\""
+ }
}
]
'ext': video_extension,
'title': video_title,
}]
+
+class InaIE(InfoExtractor):
+ """Information Extractor for Ina.fr"""
+ _VALID_URL = r'(?:http://)?(?:www.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
+ IE_NAME = u'Ina'
+ def _real_extract(self,url):
+ mobj = re.match(self._VALID_URL, url)
+ if mobj is None:
+ raise ExtractorError(u'Invalid URL: %s' % url)
+ video_id = mobj.group('id')
+ self.to_screen(u'video id : %s' % video_id)
+ mrss_url='http://player.ina.fr/notices/%s.mrss'%video_id
+ self.to_screen(u'mrss url : %s' % mrss_url)
+ video_extension = 'mp4'
+ webpage = self._download_webpage(mrss_url,video_id)
+ self.report_extraction(video_id)
+ reg1=r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)'
+ mobj = re.search(reg1,webpage)
+
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract media URL')
+ video_url = mobj.group(1)
+ reg2=r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>'
+ mobj = re.search(reg2,webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract title')
+ video_title = mobj.group(1)
+
+ self.to_screen(u'Titre de la video : %s' % video_title)
+
+ return [{
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': video_extension,
+ 'title': video_title,
+ }]
def gen_extractors():
""" Return a list of an instance of every supported extractor.
TumblrIE(),
BandcampIE(),
RedTubeIE(),
+ InaIE,
GenericIE()
]