[rtve] Extract subtitles

This commit is contained in:
Jaime Marquínez Ferrándiz 2015-02-23 21:52:07 +01:00
parent 99209c2916
commit 25ac63ed71
2 changed files with 27 additions and 0 deletions

View File

@ -25,6 +25,7 @@ from youtube_dl.extractor import (
RaiIE, RaiIE,
VikiIE, VikiIE,
ThePlatformIE, ThePlatformIE,
RTVEALaCartaIE,
) )
@ -305,5 +306,19 @@ class TestThePlatformSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b') self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
class TestRtveSubtitles(BaseTestSubtitles):
url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
IE = RTVEALaCartaIE
def test_allsubtitles(self):
print('Skipping, only available from Spain')
return
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['es']))
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -102,14 +102,26 @@ class RTVEALaCartaIE(InfoExtractor):
video_url = compat_urlparse.urljoin( video_url = compat_urlparse.urljoin(
'http://mvod1.akcdn.rtve.es/', video_path) 'http://mvod1.akcdn.rtve.es/', video_path)
subtitles = None
if info.get('sbtFile') is not None:
subtitles = self.extract_subtitles(video_id, info['sbtFile'])
return { return {
'id': video_id, 'id': video_id,
'title': info['title'], 'title': info['title'],
'url': video_url, 'url': video_url,
'thumbnail': info.get('image'), 'thumbnail': info.get('image'),
'page_url': url, 'page_url': url,
'subtitles': subtitles,
} }
def _get_subtitles(self, video_id, sub_file):
subs = self._download_json(
sub_file + '.json', video_id,
'Downloading subtitles info')['page']['items']
return dict((s['lang'], [{'ext': 'vtt', 'url': s['src']}])
for s in subs)
class RTVELiveIE(InfoExtractor): class RTVELiveIE(InfoExtractor):
IE_NAME = 'rtve.es:live' IE_NAME = 'rtve.es:live'