youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

freesound.py (2496B)


      1 from __future__ import unicode_literals
      2 
      3 import re
      4 
      5 from .common import InfoExtractor
      6 from ..utils import (
      7     float_or_none,
      8     get_element_by_class,
      9     get_element_by_id,
     10     unified_strdate,
     11 )
     12 
     13 
     14 class FreesoundIE(InfoExtractor):
     15     _VALID_URL = r'https?://(?:www\.)?freesound\.org/people/[^/]+/sounds/(?P<id>[^/]+)'
     16     _TEST = {
     17         'url': 'http://www.freesound.org/people/miklovan/sounds/194503/',
     18         'md5': '12280ceb42c81f19a515c745eae07650',
     19         'info_dict': {
     20             'id': '194503',
     21             'ext': 'mp3',
     22             'title': 'gulls in the city.wav',
     23             'description': 'the sounds of seagulls in the city',
     24             'duration': 130.233,
     25             'uploader': 'miklovan',
     26             'upload_date': '20130715',
     27             'tags': list,
     28         }
     29     }
     30 
     31     def _real_extract(self, url):
     32         audio_id = self._match_id(url)
     33 
     34         webpage = self._download_webpage(url, audio_id)
     35 
     36         audio_url = self._og_search_property('audio', webpage, 'song url')
     37         title = self._og_search_property('audio:title', webpage, 'song title')
     38 
     39         description = self._html_search_regex(
     40             r'(?s)id=["\']sound_description["\'][^>]*>(.+?)</div>',
     41             webpage, 'description', fatal=False)
     42 
     43         duration = float_or_none(
     44             get_element_by_class('duration', webpage), scale=1000)
     45 
     46         upload_date = unified_strdate(get_element_by_id('sound_date', webpage))
     47         uploader = self._og_search_property(
     48             'audio:artist', webpage, 'uploader', fatal=False)
     49 
     50         channels = self._html_search_regex(
     51             r'Channels</dt><dd>(.+?)</dd>', webpage,
     52             'channels info', fatal=False)
     53 
     54         tags_str = get_element_by_class('tags', webpage)
     55         tags = re.findall(r'<a[^>]+>([^<]+)', tags_str) if tags_str else None
     56 
     57         audio_urls = [audio_url]
     58 
     59         LQ_FORMAT = '-lq.mp3'
     60         if LQ_FORMAT in audio_url:
     61             audio_urls.append(audio_url.replace(LQ_FORMAT, '-hq.mp3'))
     62 
     63         formats = [{
     64             'url': format_url,
     65             'format_note': channels,
     66             'quality': quality,
     67         } for quality, format_url in enumerate(audio_urls)]
     68         self._sort_formats(formats)
     69 
     70         return {
     71             'id': audio_id,
     72             'title': title,
     73             'description': description,
     74             'duration': duration,
     75             'uploader': uploader,
     76             'upload_date': upload_date,
     77             'tags': tags,
     78             'formats': formats,
     79         }