1
0
Fork 0
mirror of https://github.com/blackjack4494/yt-dlc.git synced 2024-12-28 02:39:00 +00:00

Don't accept '>' inside the content attribute in OpenGraph regexes

This commit is contained in:
Jaime Marquínez Ferrándiz 2013-11-15 12:54:13 +01:00
parent ab2d524780
commit 78fb87b283

View file

@ -316,10 +316,12 @@ class InfoExtractor(object):
# Helper functions for extracting OpenGraph info # Helper functions for extracting OpenGraph info
@staticmethod @staticmethod
def _og_regexes(prop): def _og_regexes(prop):
esc_prop = re.escape(prop) content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')'
property_re = r'property=[\'"]og:%s[\'"]' % re.escape(prop)
template = r'<meta[^>]+?%s[^>]+?%s'
return [ return [
r'<meta[^>]+?property=[\'"]og:%s[\'"][^>]+?content=(?:"(.+?)"|\'(.+?)\')' % esc_prop, template % (property_re, content_re),
r'<meta[^>]+?content=(?:"(.+?)"|\'(.+?)\')[^>]+?property=[\'"]og:%s[\'"]' % esc_prop, template % (content_re, property_re),
] ]
def _og_search_property(self, prop, html, name=None, **kargs): def _og_search_property(self, prop, html, name=None, **kargs):