[youtube] Skip WebVTT in DASH manifest (#5297)

This commit is contained in:
Yen Chi Hsuan 2015-04-08 03:45:02 +08:00
parent a35099bd33
commit de5c545648
1 changed files with 35 additions and 27 deletions

View File

@ -788,33 +788,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
errnote='Could not download DASH manifest')
formats = []
for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
if url_el is None:
continue
format_id = r.attrib['id']
video_url = url_el.text
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
f = {
'format_id': format_id,
'url': video_url,
'width': int_or_none(r.attrib.get('width')),
'height': int_or_none(r.attrib.get('height')),
'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
'asr': int_or_none(r.attrib.get('audioSamplingRate')),
'filesize': filesize,
'fps': int_or_none(r.attrib.get('frameRate')),
}
try:
existing_format = next(
fo for fo in formats
if fo['format_id'] == format_id)
except StopIteration:
full_info = self._formats.get(format_id, {}).copy()
full_info.update(f)
formats.append(full_info)
else:
existing_format.update(f)
for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
mime_type = a.attrib.get('mimeType')
for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'):
url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
if url_el is None:
continue
if mime_type == 'text/vtt':
# TODO implement WebVTT downloading
pass
elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
format_id = r.attrib['id']
video_url = url_el.text
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
f = {
'format_id': format_id,
'url': video_url,
'width': int_or_none(r.attrib.get('width')),
'height': int_or_none(r.attrib.get('height')),
'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
'asr': int_or_none(r.attrib.get('audioSamplingRate')),
'filesize': filesize,
'fps': int_or_none(r.attrib.get('frameRate')),
}
try:
existing_format = next(
fo for fo in formats
if fo['format_id'] == format_id)
except StopIteration:
full_info = self._formats.get(format_id, {}).copy()
full_info.update(f)
formats.append(full_info)
else:
existing_format.update(f)
else:
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
return formats
def _real_extract(self, url):