From b6e0c7d2e3bb17b36a3b6e16fa8fd67092658d6c Mon Sep 17 00:00:00 2001
From: Unknown <blackjack4494@web.de>
Date: Fri, 9 Oct 2020 07:06:49 +0200
Subject: [PATCH] [mtv] fix mtv.com and more(?)

---
 youtube_dlc/extractor/mtv.py | 41 ++++++++++++++++++++++++++++++++++--
 youtube_dlc/utils.py         |  7 ++++++
 2 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/youtube_dlc/extractor/mtv.py b/youtube_dlc/extractor/mtv.py
index fedd5f46b..88c5eda38 100644
--- a/youtube_dlc/extractor/mtv.py
+++ b/youtube_dlc/extractor/mtv.py
@@ -7,6 +7,7 @@ from .common import InfoExtractor
 from ..compat import (
     compat_str,
     compat_xpath,
+    compat_urlparse,
 )
 from ..utils import (
     ExtractorError,
@@ -22,6 +23,7 @@ from ..utils import (
     unescapeHTML,
     update_url_query,
     url_basename,
+    get_domain,
     xpath_text,
 )
 
@@ -253,7 +255,39 @@ class MTVServicesInfoExtractor(InfoExtractor):
 
         return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
 
-    def _extract_mgid(self, webpage):
+    def _extract_new_triforce_mgid(self, webpage, url='', data_zone=None, video_id=None):
+        # print(compat_urlparse.urlparse(url).netloc)
+        domain = get_domain(url)
+        if domain is None:
+            raise ExtractorError(
+                '[%s] could not get domain' % self.IE_NAME,
+                expected=True)
+        url = url.replace("https://", "http://")
+        enc_url = compat_urlparse.quote(url, safe='')
+        _TRIFORCE_V8_TEMPLATE = 'https://%s/feeds/triforce/manifest/v8?url=%s'
+        triforce_manifest_url = _TRIFORCE_V8_TEMPLATE % (domain, enc_url)
+
+        manifest = self._download_json(triforce_manifest_url, video_id, fatal=False)
+        if manifest.get('manifest').get('type') == 'redirect':
+            self.to_screen('Found a redirect. Downloading manifest from new location')
+            new_loc = manifest.get('manifest').get('newLocation')
+            new_loc = new_loc.replace("https://", "http://")
+            enc_new_loc = compat_urlparse.quote(new_loc, safe='')
+            triforce_manifest_new_loc = _TRIFORCE_V8_TEMPLATE % (domain, enc_new_loc)
+            manifest = self._download_json(triforce_manifest_new_loc, video_id, fatal=False)
+
+        item_id = try_get(manifest, lambda x: x['manifest']['reporting']['itemId'], compat_str)
+        if not item_id:
+            self.to_screen('Found no id!')
+            return
+
+        # 'episode' can be anything. 'content' is used often as well
+        _MGID_TEMPLATE = 'mgid:arc:episode:%s:%s'
+        mgid = _MGID_TEMPLATE % (domain, item_id)
+
+        return mgid
+
+    def _extract_mgid(self, webpage, url):
         try:
             # the url can be http://media.mtvnservices.com/fb/{mgid}.swf
             # or http://media.mtvnservices.com/{mgid}
@@ -275,6 +309,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
             mgid = self._search_regex(
                 r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=None)
 
+        if not mgid:
+            mgid = self._extract_new_triforce_mgid(webpage, url)
+
         if not mgid:
             mgid = self._extract_triforce_mgid(webpage)
 
@@ -283,7 +320,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
     def _real_extract(self, url):
         title = url_basename(url)
         webpage = self._download_webpage(url, title)
-        mgid = self._extract_mgid(webpage)
+        mgid = self._extract_mgid(webpage, url)
         videos_info = self._get_videos_info(mgid)
         return videos_info
 
diff --git a/youtube_dlc/utils.py b/youtube_dlc/utils.py
index 32b179c6f..54a4ea2aa 100644
--- a/youtube_dlc/utils.py
+++ b/youtube_dlc/utils.py
@@ -1984,6 +1984,7 @@ def get_elements_by_attribute(attribute, value, html, escape_value=True):
 
 class HTMLAttributeParser(compat_HTMLParser):
     """Trivial HTML parser to gather the attributes for a single element"""
+
     def __init__(self):
         self.attrs = {}
         compat_HTMLParser.__init__(self)
@@ -2378,6 +2379,7 @@ class GeoRestrictedError(ExtractorError):
     This exception may be thrown when a video is not available from your
     geographic location due to geographic restrictions imposed by a website.
     """
+
     def __init__(self, msg, countries=None):
         super(GeoRestrictedError, self).__init__(msg, expected=True)
         self.msg = msg
@@ -3558,6 +3560,11 @@ def remove_quotes(s):
     return s
 
 
+def get_domain(url):
+    domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
+    return domain.group('domain') if domain else None
+
+
 def url_basename(url):
     path = compat_urlparse.urlparse(url).path
     return path.strip('/').split('/')[-1]