googlesearch.py (1692B)
1 from __future__ import unicode_literals 2 3 import itertools 4 import re 5 6 from .common import SearchInfoExtractor 7 8 9 class GoogleSearchIE(SearchInfoExtractor): 10 IE_DESC = 'Google Video search' 11 _MAX_RESULTS = 1000 12 IE_NAME = 'video.google:search' 13 _SEARCH_KEY = 'gvsearch' 14 _TEST = { 15 'url': 'gvsearch15:python language', 16 'info_dict': { 17 'id': 'python language', 18 'title': 'python language', 19 }, 20 'playlist_count': 15, 21 } 22 23 def _get_n_results(self, query, n): 24 """Get a specified number of results for a query""" 25 26 entries = [] 27 res = { 28 '_type': 'playlist', 29 'id': query, 30 'title': query, 31 } 32 33 for pagenum in itertools.count(): 34 webpage = self._download_webpage( 35 'http://www.google.com/search', 36 'gvsearch:' + query, 37 note='Downloading result page %s' % (pagenum + 1), 38 query={ 39 'tbm': 'vid', 40 'q': query, 41 'start': pagenum * 10, 42 'hl': 'en', 43 }) 44 45 for hit_idx, mobj in enumerate(re.finditer( 46 r'<h3 class="r"><a href="([^"]+)"', webpage)): 47 48 # Skip playlists 49 if not re.search(r'id="vidthumb%d"' % (hit_idx + 1), webpage): 50 continue 51 52 entries.append({ 53 '_type': 'url', 54 'url': mobj.group(1) 55 }) 56 57 if (len(entries) >= n) or not re.search(r'id="pnnext"', webpage): 58 res['entries'] = entries[:n] 59 return res