From fd76a14259ed4f9685e0cddae5a111ac8b4aa300 Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Sun, 18 Apr 2021 12:47:42 +0200
Subject: [PATCH] [extractor/common, downloader/ism] Extract SSTR subtitle
 tracks

_parse_ism_formats was extended into _parse_ism_formats_and_subtitles;
all direct users were updated, though _extract_ism_formats was left
as a compatibility wrapper.

The SSTR downloader was also modified in order to prepare for muxing
subtitle streams, although no support for any subtitle codecs was
added in this commit.
---
 yt_dlp/downloader/ism.py   |  38 ++++++++++----
 yt_dlp/extractor/common.py | 105 ++++++++++++++++++++++++-------------
 2 files changed, 98 insertions(+), 45 deletions(-)

diff --git a/yt_dlp/downloader/ism.py b/yt_dlp/downloader/ism.py
index b3e22793b..103064df1 100644
--- a/yt_dlp/downloader/ism.py
+++ b/yt_dlp/downloader/ism.py
@@ -48,7 +48,7 @@ def write_piff_header(stream, params):
     language = params.get('language', 'und')
     height = params.get('height', 0)
     width = params.get('width', 0)
-    is_audio = width == 0 and height == 0
+    stream_type = params['stream_type']
     creation_time = modification_time = int(time.time())
 
     ftyp_payload = b'isml'  # major brand
@@ -77,7 +77,7 @@ def write_piff_header(stream, params):
     tkhd_payload += u32.pack(0) * 2  # reserved
     tkhd_payload += s16.pack(0)  # layer
     tkhd_payload += s16.pack(0)  # alternate group
-    tkhd_payload += s88.pack(1 if is_audio else 0)  # volume
+    tkhd_payload += s88.pack(1 if stream_type == 'audio' else 0)  # volume
     tkhd_payload += u16.pack(0)  # reserved
     tkhd_payload += unity_matrix
     tkhd_payload += u1616.pack(width)
@@ -93,19 +93,35 @@ def write_piff_header(stream, params):
     mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload)  # Media Header Box
 
     hdlr_payload = u32.pack(0)  # pre defined
-    hdlr_payload += b'soun' if is_audio else b'vide'  # handler type
-    hdlr_payload += u32.pack(0) * 3  # reserved
-    hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0'  # name
+    if stream_type == 'audio':  # handler type
+        hdlr_payload += b'soun'
+        hdlr_payload += u32.pack(0) * 3  # reserved
+        hdlr_payload += b'SoundHandler\0'  # name
+    elif stream_type == 'video':
+        hdlr_payload += b'vide'
+        hdlr_payload += u32.pack(0) * 3  # reserved
+        hdlr_payload += b'VideoHandler\0'  # name
+    elif stream_type == 'text':
+        hdlr_payload += b'subt'
+        hdlr_payload += u32.pack(0) * 3  # reserved
+        hdlr_payload += b'SubtitleHandler\0'  # name
+    else:
+        assert False
     mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload)  # Handler Reference Box
 
-    if is_audio:
+    if stream_type == 'audio':
         smhd_payload = s88.pack(0)  # balance
         smhd_payload += u16.pack(0)  # reserved
         media_header_box = full_box(b'smhd', 0, 0, smhd_payload)  # Sound Media Header
-    else:
+    elif stream_type == 'video':
         vmhd_payload = u16.pack(0)  # graphics mode
         vmhd_payload += u16.pack(0) * 3  # opcolor
         media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload)  # Video Media Header
+    elif stream_type == 'text':
+        sthd_payload = u16.pack(0) * 2
+        media_header_box = full_box(b'sthd', 0, 1, sthd_payload)  # Subtitle Media Header
+    else:
+        assert False
     minf_payload = media_header_box
 
     dref_payload = u32.pack(1)  # entry count
@@ -117,7 +133,7 @@ def write_piff_header(stream, params):
 
     sample_entry_payload = u8.pack(0) * 6  # reserved
     sample_entry_payload += u16.pack(1)  # data reference index
-    if is_audio:
+    if stream_type == 'audio':
         sample_entry_payload += u32.pack(0) * 2  # reserved
         sample_entry_payload += u16.pack(params.get('channels', 2))
         sample_entry_payload += u16.pack(params.get('bits_per_sample', 16))
@@ -127,7 +143,7 @@ def write_piff_header(stream, params):
 
         if fourcc == 'AACL':
             sample_entry_box = box(b'mp4a', sample_entry_payload)
-    else:
+    elif stream_type == 'video':
         sample_entry_payload += u16.pack(0)  # pre defined
         sample_entry_payload += u16.pack(0)  # reserved
         sample_entry_payload += u32.pack(0) * 3  # pre defined
@@ -155,6 +171,10 @@ def write_piff_header(stream, params):
             avcc_payload += pps
             sample_entry_payload += box(b'avcC', avcc_payload)  # AVC Decoder Configuration Record
             sample_entry_box = box(b'avc1', sample_entry_payload)  # AVC Simple Entry
+        else:
+            assert False
+    else:
+        assert False
     stsd_payload += sample_entry_box
 
     stbl_payload = full_box(b'stsd', 0, 0, stsd_payload)  # Sample Description Box
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 0c56a9015..045d463d6 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -2750,26 +2750,38 @@ class InfoExtractor(object):
                         else:
                             # Assuming direct URL to unfragmented media.
                             f['url'] = base_url
-                        formats.append(f)
+                        if content_type in ('video', 'audio'):
+                            formats.append(f)
+                        elif content_type == 'text':
+                            subtitles.setdefault(lang or 'und', []).append(f)
                     else:
                         self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
         return formats, subtitles
 
-    def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
+    def _extract_ism_formats(self, *args, **kwargs):
+        fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)
+        if subs:
+            self.report_warning(bug_reports_message(
+                "Ignoring subtitle tracks found in the ISM manifest; "
+                "if any subtitle tracks are missing,"
+            ))
+        return fmts
+
+    def _extract_ism_formats_and_subtitles(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
         res = self._download_xml_handle(
             ism_url, video_id,
             note=note or 'Downloading ISM manifest',
             errnote=errnote or 'Failed to download ISM manifest',
             fatal=fatal, data=data, headers=headers, query=query)
         if res is False:
-            return []
+            return [], {}
         ism_doc, urlh = res
         if ism_doc is None:
-            return []
+            return [], {}
 
-        return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
+        return self._parse_ism_formats_and_subtitles(ism_doc, urlh.geturl(), ism_id)
 
-    def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
+    def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
         """
         Parse formats from ISM manifest.
         References:
@@ -2777,22 +2789,24 @@ class InfoExtractor(object):
             https://msdn.microsoft.com/en-us/library/ff469518.aspx
         """
         if ism_doc.get('IsLive') == 'TRUE':
-            return []
+            return [], {}
         if (not self._downloader.params.get('allow_unplayable_formats')
                 and ism_doc.find('Protection') is not None):
-            return []
+            return [], {}
 
         duration = int(ism_doc.attrib['Duration'])
         timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
 
         formats = []
+        subtitles = {}
         for stream in ism_doc.findall('StreamIndex'):
             stream_type = stream.get('Type')
-            if stream_type not in ('video', 'audio'):
+            if stream_type not in ('video', 'audio', 'text'):
                 continue
             url_pattern = stream.attrib['Url']
             stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
             stream_name = stream.get('Name')
+            stream_language = stream.get('Language', 'und')
             for track in stream.findall('QualityLevel'):
                 fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None)
                 # TODO: add support for WVC1 and WMAP
@@ -2839,33 +2853,52 @@ class InfoExtractor(object):
                     format_id.append(stream_name)
                 format_id.append(compat_str(tbr))
 
-                formats.append({
-                    'format_id': '-'.join(format_id),
-                    'url': ism_url,
-                    'manifest_url': ism_url,
-                    'ext': 'ismv' if stream_type == 'video' else 'isma',
-                    'width': width,
-                    'height': height,
-                    'tbr': tbr,
-                    'asr': sampling_rate,
-                    'vcodec': 'none' if stream_type == 'audio' else fourcc,
-                    'acodec': 'none' if stream_type == 'video' else fourcc,
-                    'protocol': 'ism',
-                    'fragments': fragments,
-                    '_download_params': {
-                        'duration': duration,
-                        'timescale': stream_timescale,
-                        'width': width or 0,
-                        'height': height or 0,
-                        'fourcc': fourcc,
-                        'codec_private_data': track.get('CodecPrivateData'),
-                        'sampling_rate': sampling_rate,
-                        'channels': int_or_none(track.get('Channels', 2)),
-                        'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
-                        'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
-                    },
-                })
-        return formats
+                if stream_type == 'text':
+                    subtitles.setdefault(stream_language, []).append({
+                        'ext': 'ismt',
+                        'protocol': 'ism',
+                        'url': ism_url,
+                        'manifest_url': ism_url,
+                        'fragments': fragments,
+                        '_download_params': {
+                            'stream_type': stream_type,
+                            'duration': duration,
+                            'timescale': stream_timescale,
+                            'fourcc': fourcc,
+                            'language': stream_language,
+                            'codec_private_data': track.get('CodecPrivateData'),
+                        }
+                    })
+                elif stream_type in ('video', 'audio'):
+                    formats.append({
+                        'format_id': '-'.join(format_id),
+                        'url': ism_url,
+                        'manifest_url': ism_url,
+                        'ext': 'ismv' if stream_type == 'video' else 'isma',
+                        'width': width,
+                        'height': height,
+                        'tbr': tbr,
+                        'asr': sampling_rate,
+                        'vcodec': 'none' if stream_type == 'audio' else fourcc,
+                        'acodec': 'none' if stream_type == 'video' else fourcc,
+                        'protocol': 'ism',
+                        'fragments': fragments,
+                        '_download_params': {
+                            'stream_type': stream_type,
+                            'duration': duration,
+                            'timescale': stream_timescale,
+                            'width': width or 0,
+                            'height': height or 0,
+                            'fourcc': fourcc,
+                            'language': stream_language,
+                            'codec_private_data': track.get('CodecPrivateData'),
+                            'sampling_rate': sampling_rate,
+                            'channels': int_or_none(track.get('Channels', 2)),
+                            'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
+                            'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
+                        },
+                    })
+        return formats, subtitles
 
     def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None, quality=None):
         def absolute_url(item_url):