[tapely] Add new extractor (closes #3861)
authorNaglis Jonaitis <njonaitis@gmail.com>
Wed, 1 Oct 2014 14:26:09 +0000 (17:26 +0300)
committerNaglis Jonaitis <njonaitis@gmail.com>
Wed, 1 Oct 2014 14:26:09 +0000 (17:26 +0300)
youtube_dl/extractor/__init__.py
youtube_dl/extractor/tapely.py [new file with mode: 0644]

index ce1a2b32b4a95987694cee712404c34b22794c8c..e51ea701f68d3b850edcc188f4221dceb139b6dc 100644 (file)
@@ -355,6 +355,7 @@ from .swrmediathek import SWRMediathekIE
 from .syfy import SyfyIE
 from .sztvhu import SztvHuIE
 from .tagesschau import TagesschauIE
+from .tapely import TapelyIE
 from .teachertube import (
     TeacherTubeIE,
     TeacherTubeUserIE,
diff --git a/youtube_dl/extractor/tapely.py b/youtube_dl/extractor/tapely.py
new file mode 100644 (file)
index 0000000..2c1178b
--- /dev/null
@@ -0,0 +1,97 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    clean_html,
+    compat_urllib_request,
+    float_or_none,
+    parse_iso8601,
+)
+
+
+class TapelyIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?tape\.ly/(?P<id>[A-Za-z0-9\-_]+)(?:/(?P<songnr>\d+))?'
+    _API_URL = 'http://tape.ly/showtape?id={0:}'
+    _S3_SONG_URL = 'http://mytape.s3.amazonaws.com/{0:}'
+    _TESTS = [
+        {
+            'url': 'http://tape.ly/my-grief-as-told-by-water',
+            'info_dict': {
+                'id': 23952,
+                'title': 'my grief as told by water',
+                'thumbnail': 're:^https?://.*\.png$',
+                'uploader_id': 16484,
+                'timestamp': 1411848286,
+                'description': 'For Robin and Ponkers, whom the tides of life have taken out to sea.',
+            },
+            'playlist_count': 13,
+        },
+        {
+            'url': 'http://tape.ly/my-grief-as-told-by-water/1',
+            'md5': '79031f459fdec6530663b854cbc5715c',
+            'info_dict': {
+                'id': 258464,
+                'title': 'Dreaming Awake  (My Brightest Diamond)',
+                'ext': 'm4a',
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('id')
+
+        playlist_url = self._API_URL.format(display_id)
+        request = compat_urllib_request.Request(playlist_url)
+        request.add_header('X-Requested-With', 'XMLHttpRequest')
+        request.add_header('Accept', 'application/json')
+
+        playlist = self._download_json(request, display_id)
+
+        tape = playlist['tape']
+
+        entries = []
+        for s in tape['songs']:
+            song = s['song']
+            entry = {
+                'id': song['id'],
+                'duration': float_or_none(song.get('songduration'), 1000),
+                'title': song['title'],
+            }
+            if song['source'] == 'S3':
+                entry.update({
+                    'url': self._S3_SONG_URL.format(song['filename']),
+                })
+                entries.append(entry)
+            elif song['source'] == 'YT':
+                _, _, yt_id = song['filename'].split('/')
+                entry.update(self.url_result(yt_id, 'Youtube', video_id=yt_id))
+                entries.append(entry)
+            else:
+                self.report_warning('Unknown song source: %s' % song['source'])
+
+        if mobj.group('songnr'):
+            songnr = int(mobj.group('songnr')) - 1
+            try:
+                return entries[songnr]
+            except IndexError:
+                raise ExtractorError(
+                    'No song with index: %s' % mobj.group('songnr'),
+                    expected=True)
+
+        return {
+            '_type': 'playlist',
+            'id': tape['id'],
+            'display_id': display_id,
+            'title': tape['name'],
+            'entries': entries,
+            'thumbnail': tape.get('image_url'),
+            'description': clean_html(tape.get('subtext')),
+            'like_count': tape.get('likescount'),
+            'uploader_id': tape.get('user_id'),
+            'timestamp': parse_iso8601(tape.get('published_at')),
+        }