From 526b3b071632bc3c840ae4dd3579e015f41df6f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 23 Nov 2015 21:14:03 +0600 Subject: [PATCH] [youtube] Clarify ytplayer.config extraction rationale --- youtube_dl/extractor/youtube.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 824335d0a..5482aac3b 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -898,6 +898,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _get_ytplayer_config(self, video_id, webpage): patterns = ( + # User data may contain arbitrary character sequences that may affect + # JSON extraction with regex, e.g. when '};' is contained the second + # regex won't capture the whole JSON. Yet working around by trying more + # concrete regex first keeping in mind proper quoted string handling + # to be implemented in future that will replace this workaround (see + # https://github.com/rg3/youtube-dl/issues/7468, + # https://github.com/rg3/youtube-dl/pull/7599) r';ytplayer\.config\s*=\s*({.+?});ytplayer', r';ytplayer\.config\s*=\s*({.+?});', )