Respect age_limit when listing extractors (Fixes #4653)

This commit is contained in:
Philipp Hagemeister 2015-01-07 07:20:20 +01:00
parent 76b3c61012
commit 0590062925
8 changed files with 71 additions and 24 deletions

View File

@ -82,18 +82,8 @@ class FakeYDL(YoutubeDL):
def gettestcases(include_onlymatching=False): def gettestcases(include_onlymatching=False):
for ie in youtube_dl.extractor.gen_extractors(): for ie in youtube_dl.extractor.gen_extractors():
t = getattr(ie, '_TEST', None) for tc in ie.get_testcases(include_onlymatching):
if t: yield tc
assert not hasattr(ie, '_TESTS'), \
'%s has _TEST and _TESTS' % type(ie).__name__
tests = [t]
else:
tests = getattr(ie, '_TESTS', [])
for t in tests:
if not include_onlymatching and t.get('only_matching', False):
continue
t['name'] = type(ie).__name__[:-len('IE')]
yield t
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()

View File

@ -16,6 +16,7 @@ import json
import xml.etree.ElementTree import xml.etree.ElementTree
from youtube_dl.utils import ( from youtube_dl.utils import (
age_restricted,
args_to_str, args_to_str,
clean_html, clean_html,
DateRange, DateRange,
@ -402,5 +403,12 @@ Trying to open render node...
Success at /dev/dri/renderD128. Success at /dev/dri/renderD128.
ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
def test_age_restricted(self):
self.assertFalse(age_restricted(None, 10)) # unrestricted content
self.assertFalse(age_restricted(1, None)) # unrestricted policy
self.assertFalse(age_restricted(8, 10))
self.assertTrue(age_restricted(18, 14))
self.assertFalse(age_restricted(18, 18))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -63,6 +63,7 @@ from .utils import (
YoutubeDLHandler, YoutubeDLHandler,
prepend_extension, prepend_extension,
args_to_str, args_to_str,
age_restricted,
) )
from .cache import Cache from .cache import Cache
from .extractor import get_info_extractor, gen_extractors from .extractor import get_info_extractor, gen_extractors
@ -550,13 +551,8 @@ class YoutubeDL(object):
max_views = self.params.get('max_views') max_views = self.params.get('max_views')
if max_views is not None and view_count > max_views: if max_views is not None and view_count > max_views:
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
age_limit = self.params.get('age_limit') if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
if age_limit is not None: return 'Skipping "%s" because it is age restricted' % title
actual_age_limit = info_dict.get('age_limit')
if actual_age_limit is None:
actual_age_limit = 0
if age_limit < actual_age_limit:
return 'Skipping "' + title + '" because it is age restricted'
if self.in_download_archive(info_dict): if self.in_download_archive(info_dict):
return '%s has already been recorded in archive' % video_title return '%s has already been recorded in archive' % video_title
return None return None

View File

@ -38,7 +38,7 @@ from .update import update_self
from .downloader import ( from .downloader import (
FileDownloader, FileDownloader,
) )
from .extractor import gen_extractors from .extractor import list_extractors
from .YoutubeDL import YoutubeDL from .YoutubeDL import YoutubeDL
@ -95,17 +95,15 @@ def _real_main(argv=None):
_enc = preferredencoding() _enc = preferredencoding()
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
extractors = gen_extractors()
if opts.list_extractors: if opts.list_extractors:
for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()): for ie in list_extractors(opts.age_limit):
compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '')) compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
matchedUrls = [url for url in all_urls if ie.suitable(url)] matchedUrls = [url for url in all_urls if ie.suitable(url)]
for mu in matchedUrls: for mu in matchedUrls:
compat_print(' ' + mu) compat_print(' ' + mu)
sys.exit(0) sys.exit(0)
if opts.list_extractor_descriptions: if opts.list_extractor_descriptions:
for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()): for ie in list_extractors(opts.age_limit):
if not ie._WORKING: if not ie._WORKING:
continue continue
desc = getattr(ie, 'IE_DESC', ie.IE_NAME) desc = getattr(ie, 'IE_DESC', ie.IE_NAME)

View File

@ -560,6 +560,8 @@ from .zingmp3 import (
ZingMp3AlbumIE, ZingMp3AlbumIE,
) )
from ..utils import age_restricted
_ALL_CLASSES = [ _ALL_CLASSES = [
klass klass
for name, klass in globals().items() for name, klass in globals().items()
@ -575,6 +577,17 @@ def gen_extractors():
return [klass() for klass in _ALL_CLASSES] return [klass() for klass in _ALL_CLASSES]
def list_extractors(age_limit):
"""
Return a list of extractors that are suitable for the given age,
sorted by extractor ID.
"""
return sorted(
filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()),
key=lambda ie: ie.IE_NAME.lower())
def get_info_extractor(ie_name): def get_info_extractor(ie_name):
"""Returns the info extractor class with the given ie_name""" """Returns the info extractor class with the given ie_name"""
return globals()[ie_name + 'IE'] return globals()[ie_name + 'IE']

View File

@ -21,6 +21,7 @@ from ..compat import (
compat_str, compat_str,
) )
from ..utils import ( from ..utils import (
age_restricted,
clean_html, clean_html,
compiled_regex_type, compiled_regex_type,
ExtractorError, ExtractorError,
@ -877,6 +878,35 @@ class InfoExtractor(object):
None, '/', True, False, expire_time, '', None, None, None) None, '/', True, False, expire_time, '', None, None, None)
self._downloader.cookiejar.set_cookie(cookie) self._downloader.cookiejar.set_cookie(cookie)
def get_testcases(self, include_onlymatching=False):
t = getattr(self, '_TEST', None)
if t:
assert not hasattr(self, '_TESTS'), \
'%s has _TEST and _TESTS' % type(self).__name__
tests = [t]
else:
tests = getattr(self, '_TESTS', [])
for t in tests:
if not include_onlymatching and t.get('only_matching', False):
continue
t['name'] = type(self).__name__[:-len('IE')]
yield t
def is_suitable(self, age_limit):
""" Test whether the extractor is generally suitable for the given
age limit (i.e. pornographic sites are not, all others usually are) """
any_restricted = False
for tc in self.get_testcases(include_onlymatching=False):
if 'playlist' in tc:
tc = tc['playlist'][0]
is_restricted = age_restricted(
tc.get('info_dict', {}).get('age_limit'), age_limit)
if not is_restricted:
return True
any_restricted = any_restricted or is_restricted
return not any_restricted
class SearchInfoExtractor(InfoExtractor): class SearchInfoExtractor(InfoExtractor):
""" """

View File

@ -95,6 +95,7 @@ class XTubeUserIE(InfoExtractor):
'url': 'http://www.xtube.com/community/profile.php?user=greenshowers', 'url': 'http://www.xtube.com/community/profile.php?user=greenshowers',
'info_dict': { 'info_dict': {
'id': 'greenshowers', 'id': 'greenshowers',
'age_limit': 18,
}, },
'playlist_mincount': 155, 'playlist_mincount': 155,
} }
@ -124,6 +125,7 @@ class XTubeUserIE(InfoExtractor):
return { return {
'_type': 'playlist', '_type': 'playlist',
'id': username, 'id': username,
'age_limit': 18,
'entries': [{ 'entries': [{
'_type': 'url', '_type': 'url',
'url': eurl, 'url': eurl,

View File

@ -1560,3 +1560,13 @@ def urlhandle_detect_ext(url_handle):
getheader = url_handle.info().getheader getheader = url_handle.info().getheader
return getheader('Content-Type').split("/")[1] return getheader('Content-Type').split("/")[1]
def age_restricted(content_limit, age_limit):
""" Returns True iff the content should be blocked """
if age_limit is None: # No limit set
return False
if content_limit is None:
return False # Content available for everyone
return age_limit < content_limit