[youtube] Urls like youtube.com/NASA are now interpreted as users (fixes #1069)

author Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>

Thu, 5 Sep 2013 20:38:23 +0000 (22:38 +0200)

committer Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>

Thu, 5 Sep 2013 20:39:15 +0000 (22:39 +0200)
author Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Thu, 5 Sep 2013 20:38:23 +0000 (22:38 +0200)
committer Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Thu, 5 Sep 2013 20:39:15 +0000 (22:39 +0200)
diff --git a/test/test_all_urls.py b/test/test_all_urls.py

index c54faa380e44a57969563109d3a7baaf11e835c7..fe4090d18747d3b9da35d1e12c634e1c00e18c5c 100644 (file)
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -11,6 +11,15 @@ from youtube_dl.extractor import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE,
  from helper import get_testcases
  
  class TestAllURLsMatching(unittest.TestCase):
+    def setUp(self):
+        self.ies = gen_extractors()
+
+    def matching_ies(self, url):
+        return [ie.IE_NAME for ie in self.ies if ie.suitable(url) and ie.IE_NAME != 'generic']
+
+    def assertMatch(self, url, ie_list):
+        self.assertEqual(self.matching_ies(url), ie_list)
+
      def test_youtube_playlist_matching(self):
          self.assertTrue(YoutubePlaylistIE.suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
          self.assertTrue(YoutubePlaylistIE.suitable(u'UUBABnxM4Ar9ten8Mdjj1j0Q')) #585
@@ -24,12 +33,17 @@ class TestAllURLsMatching(unittest.TestCase):
      def test_youtube_matching(self):
          self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
          self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
+        self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
+        self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
  
      def test_youtube_channel_matching(self):
          self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM'))
          self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec'))
          self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM/videos'))
  
+    def test_youtube_user_matching(self):
+        self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
+
      def test_justin_tv_channelid_matching(self):
          self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
          self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
@@ -63,15 +77,12 @@ class TestAllURLsMatching(unittest.TestCase):
                      self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
  
      def test_keywords(self):
-        ies = gen_extractors()
-        matching_ies = lambda url: [ie.IE_NAME for ie in ies
-                                    if ie.suitable(url) and ie.IE_NAME != 'generic']
-        self.assertEqual(matching_ies(':ytsubs'), ['youtube:subscriptions'])
-        self.assertEqual(matching_ies(':ytsubscriptions'), ['youtube:subscriptions'])
-        self.assertEqual(matching_ies(':thedailyshow'), ['ComedyCentral'])
-        self.assertEqual(matching_ies(':tds'), ['ComedyCentral'])
-        self.assertEqual(matching_ies(':colbertreport'), ['ComedyCentral'])
-        self.assertEqual(matching_ies(':cr'), ['ComedyCentral'])
+        self.assertMatch(':ytsubs', ['youtube:subscriptions'])
+        self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
+        self.assertMatch(':thedailyshow', ['ComedyCentral'])
+        self.assertMatch(':tds', ['ComedyCentral'])
+        self.assertMatch(':colbertreport', ['ComedyCentral'])
+        self.assertMatch(':cr', ['ComedyCentral'])
  
  
  if __name__ == '__main__':
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 782cb1cfb5e596797e8e27da9b69ef87c5b9cee6..1facf1cc516ac0ea32f82fae3ffa8506f8764d8e 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -135,7 +135,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
      _VALID_URL = r"""^
                       (
                           (?:https?://)?                                       # http(s):// (optional)
-                         (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|
+                         (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
                              tube\.majestyc\.net/)                             # the various hostnames, with wildcard subdomains
                           (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
                           (?:                                                  # the various things that can precede the ID:
@@ -146,7 +146,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                                   (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
                                   v=
                               )
-                         )?                                                   # optional -> youtube.com/xxxx is OK
+                         ))
+                         |youtu\.be/                                          # just youtu.be/xxxx
+                         )
                       )?                                                       # all until now is optional -> you can pass the naked ID
                       ([0-9A-Za-z_-]+)                                         # here is it! the YouTube video ID
                       (?(1).+)?                                                # if we found the ID, everything can follow
@@ -1013,13 +1015,17 @@ class YoutubeChannelIE(InfoExtractor):
  
  class YoutubeUserIE(InfoExtractor):
      IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
-    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
+    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)([A-Za-z0-9_-]+)'
      _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
      _GDATA_PAGE_SIZE = 50
      _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
      _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
      IE_NAME = u'youtube:user'
  
+    def suitable(cls, url):
+        if YoutubeIE.suitable(url): return False
+        else: return super(YoutubeUserIE, cls).suitable(url)
+
      def _real_extract(self, url):
          # Extract username
          mobj = re.match(self._VALID_URL, url)
author	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
	Thu, 5 Sep 2013 20:38:23 +0000 (22:38 +0200)
committer	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
	Thu, 5 Sep 2013 20:39:15 +0000 (22:39 +0200)
test/test_all_urls.py		patch \| blob \| history
youtube_dl/extractor/youtube.py		patch \| blob \| history