[youtube:ytsearch] Use the same system as the search webpage (fixes #5483)

The gdata api V2 was deprecated and according to http://youtube-eng.blogspot.com.es/2014/03/committing-to-youtube-data-api-v3_4.html remains available until April 20, 2015.
This commit is contained in:
Jaime Marquínez Ferrándiz 2015-04-21 19:30:31 +02:00
parent cc38fa6cfb
commit b4c0806963
1 changed files with 26 additions and 25 deletions

View File

@ -1458,54 +1458,55 @@ class YoutubeUserIE(YoutubeChannelIE):
return super(YoutubeUserIE, cls).suitable(url) return super(YoutubeUserIE, cls).suitable(url)
class YoutubeSearchIE(SearchInfoExtractor): class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
IE_DESC = 'YouTube.com searches' IE_DESC = 'YouTube.com searches'
_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc' # there doesn't appear to be a real limit, for example if you search for
_MAX_RESULTS = 1000 # 'python' you get more than 8.000.000 results
_MAX_RESULTS = float('inf')
IE_NAME = 'youtube:search' IE_NAME = 'youtube:search'
_SEARCH_KEY = 'ytsearch' _SEARCH_KEY = 'ytsearch'
_EXTRA_QUERY_ARGS = {}
def _get_n_results(self, query, n): def _get_n_results(self, query, n):
"""Get a specified number of results for a query""" """Get a specified number of results for a query"""
video_ids = [] videos = []
pagenum = 0
limit = n limit = n
PAGE_SIZE = 50
while (PAGE_SIZE * pagenum) < limit: for pagenum in itertools.count(1):
result_url = self._API_URL % ( url_query = {
compat_urllib_parse.quote_plus(query.encode('utf-8')), 'search_query': query,
max((PAGE_SIZE * pagenum) + 1), 2) 'page': pagenum,
data_json = self._download_webpage( 'spf': 'navigate',
}
url_query.update(self._EXTRA_QUERY_ARGS)
result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query)
data = self._download_json(
result_url, video_id='query "%s"' % query, result_url, video_id='query "%s"' % query,
note='Downloading page %s' % (pagenum + 1), note='Downloading page %s' % pagenum,
errnote='Unable to download API page') errnote='Unable to download API page')
data = json.loads(data_json) html_content = data[1]['body']['content']
api_response = data['data']
if 'items' not in api_response: if 'class="search-message' in html_content:
raise ExtractorError( raise ExtractorError(
'[youtube] No video results', expected=True) '[youtube] No video results', expected=True)
new_ids = list(video['id'] for video in api_response['items']) new_videos = self._ids_to_results(orderedSet(re.findall(
video_ids += new_ids r'href="/watch\?v=(.{11})', html_content)))
videos += new_videos
if not new_videos or len(videos) > limit:
break
limit = min(n, api_response['totalItems']) if len(videos) > n:
pagenum += 1 videos = videos[:n]
if len(video_ids) > n:
video_ids = video_ids[:n]
videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in video_ids]
return self.playlist_result(videos, query) return self.playlist_result(videos, query)
class YoutubeSearchDateIE(YoutubeSearchIE): class YoutubeSearchDateIE(YoutubeSearchIE):
IE_NAME = YoutubeSearchIE.IE_NAME + ':date' IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
_SEARCH_KEY = 'ytsearchdate' _SEARCH_KEY = 'ytsearchdate'
IE_DESC = 'YouTube.com searches, newest videos first' IE_DESC = 'YouTube.com searches, newest videos first'
_EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
class YoutubeSearchURLIE(InfoExtractor): class YoutubeSearchURLIE(InfoExtractor):