Fix for series list not getting populated with addic7ed.

2025-03-03 10:06:24 +00:00 · 2020-09-17 08:51:35 -04:00 · 2020-09-17 08:51:35 -04:00 · e0a62b9fe0
commit e0a62b9fe0
parent d5a1735212
1 changed files with 16 additions and 23 deletions
--- a/libs/subliminal_patch/providers/addic7ed.py
+++ b/libs/subliminal_patch/providers/addic7ed.py
@ -21,8 +21,6 @@ from subzero.language import Language

 logger = logging.getLogger(__name__)

-show_cells_re = re.compile(b'<td class="(?:version|vr)">.*?</td>', re.DOTALL)
-
 #: Series header parsing regex
 series_year_re = re.compile(r'^(?P<series>[ \w\'.:(),*&!?-]+?)(?: \((?P<year>\d{4})\))?$')

@ -232,34 +230,29 @@ class Addic7edProvider(_Addic7edProvider):
        logger.info('Getting show ids')
        region.set(self.last_show_ids_fetch_key, datetime.datetime.now())

-        r = self.session.get(self.server_url + 'shows.php', timeout=10)
+        r = self.session.get(self.server_url, timeout=10)
        r.raise_for_status()

-        # LXML parser seems to fail when parsing Addic7ed.com HTML markup.
-        # Last known version to work properly is 3.6.4 (next version, 3.7.0, fails)
-        # Assuming the site's markup is bad, and stripping it down to only contain what's needed.
-        show_cells = re.findall(show_cells_re, r.content)
-        if show_cells:
-            soup = ParserBeautifulSoup(b''.join(show_cells).decode('utf-8', 'ignore'), ['lxml', 'html.parser'])
-        else:
-            # If RegEx fails, fall back to original r.text and use 'html.parser'
-            soup = ParserBeautifulSoup(r.text, ['html.parser'])
+        soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser'])

        # populate the show ids
        show_ids = {}
-        shows = soup.select('td > h3 > a[href^="/show/"]')
+        shows = soup.find(id='qsShow')
        for show in shows:
-            show_clean = sanitize(show.text, default_characters=self.sanitize_characters)
-            try:
-                show_id = int(show['href'][6:])
-            except ValueError:
-                continue
+            if hasattr(show, 'attrs'):
+                try:
+                    show_id = int(show.attrs['value'])
+                except ValueError:
+                    continue

-            show_ids[show_clean] = show_id
-            match = series_year_re.match(show_clean)
-            if match and match.group(2) and match.group(1) not in show_ids:
-                # year found, also add it without year
-                show_ids[match.group(1)] = show_id
+                if show_id != 0:
+                    show_clean = sanitize(show.text, default_characters=self.sanitize_characters)
+
+                    show_ids[show_clean] = show_id
+                    match = series_year_re.match(show_clean)
+                    if match and match.group(2) and match.group(1) not in show_ids:
+                        # year found, also add it without year
+                        show_ids[match.group(1)] = show_id

        soup.decompose()
        soup = None