From 33da98f4933ddc54c944bae985cfcc7b53563208 Mon Sep 17 00:00:00 2001 From: Justsoos Date: Wed, 1 Feb 2017 21:30:01 +0800 Subject: [PATCH] [douyutv] Improve room id regex http://www.douyu.com/t/lpl source get extra '\' with "room_id\" (from js coding) --- youtube_dl/extractor/douyutv.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py index 2f3c5113e..911594413 100644 --- a/youtube_dl/extractor/douyutv.py +++ b/youtube_dl/extractor/douyutv.py @@ -18,7 +18,7 @@ from ..utils import ( class DouyuTVIE(InfoExtractor): IE_DESC = '斗鱼' - _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(?P[A-Za-z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(?:[^/]+/)*(?P[A-Za-z0-9]+)' _TESTS = [{ 'url': 'http://www.douyutv.com/iseven', 'info_dict': { @@ -68,6 +68,10 @@ class DouyuTVIE(InfoExtractor): }, { 'url': 'http://www.douyu.com/xiaocang', 'only_matching': True, + }, { + # \"room_id\" + 'url': 'http://www.douyu.com/t/lpl', + 'only_matching': True, }] # Decompile core.swf in webpage by ffdec "Search SWFs in memory". core.swf @@ -82,7 +86,7 @@ class DouyuTVIE(InfoExtractor): else: page = self._download_webpage(url, video_id) room_id = self._html_search_regex( - r'"room_id"\s*:\s*(\d+),', page, 'room id') + r'"room_id\\?"\s*:\s*(\d+),', page, 'room id') room = self._download_json( 'http://m.douyu.com/html5/live?roomId=%s' % room_id, video_id,