Respect age_limit when listing extractors (Fixes #4653)

2015-01-07 07:20:20 +01:00 · 2015-01-07 07:20:20 +01:00 · 0590062925
parent 76b3c61012
commit 0590062925
8 changed files with 71 additions and 24 deletions
--- a/test/helper.py
+++ b/test/helper.py
@ -82,18 +82,8 @@ class FakeYDL(YoutubeDL):
 def gettestcases(include_onlymatching=False):
    for ie in youtube_dl.extractor.gen_extractors():
-        t = getattr(ie, '_TEST', None)
+        for tc in ie.get_testcases(include_onlymatching):
-        if t:
+            yield tc
            assert not hasattr(ie, '_TESTS'), \
                '%s has _TEST and _TESTS' % type(ie).__name__
            tests = [t]
        else:
            tests = getattr(ie, '_TESTS', [])
        for t in tests:
            if not include_onlymatching and t.get('only_matching', False):
                continue
            t['name'] = type(ie).__name__[:-len('IE')]
            yield t
 md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -16,6 +16,7 @@ import json
 import xml.etree.ElementTree
 from youtube_dl.utils import (
    age_restricted,
    args_to_str,
    clean_html,
    DateRange,
@ -402,5 +403,12 @@ Trying to open render node...
 Success at /dev/dri/renderD128.
 ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
    def test_age_restricted(self):
        self.assertFalse(age_restricted(None, 10))  # unrestricted content
        self.assertFalse(age_restricted(1, None))  # unrestricted policy
        self.assertFalse(age_restricted(8, 10))
        self.assertTrue(age_restricted(18, 14))
        self.assertFalse(age_restricted(18, 18))
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -63,6 +63,7 @@ from .utils import (
    YoutubeDLHandler,
    prepend_extension,
    args_to_str,
    age_restricted,
 )
 from .cache import Cache
 from .extractor import get_info_extractor, gen_extractors
@ -550,13 +551,8 @@ class YoutubeDL(object):
            max_views = self.params.get('max_views')
            if max_views is not None and view_count > max_views:
                return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
-        age_limit = self.params.get('age_limit')
+        if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
-        if age_limit is not None:
+            return 'Skipping "%s" because it is age restricted' % title
            actual_age_limit = info_dict.get('age_limit')
            if actual_age_limit is None:
                actual_age_limit = 0
            if age_limit < actual_age_limit:
                return 'Skipping "' + title + '" because it is age restricted'
        if self.in_download_archive(info_dict):
            return '%s has already been recorded in archive' % video_title
        return None
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -38,7 +38,7 @@ from .update import update_self
 from .downloader import (
    FileDownloader,
 )
-from .extractor import gen_extractors
+from .extractor import list_extractors
 from .YoutubeDL import YoutubeDL
@ -95,17 +95,15 @@ def _real_main(argv=None):
    _enc = preferredencoding()
    all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
    extractors = gen_extractors()
    if opts.list_extractors:
-        for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
+        for ie in list_extractors(opts.age_limit):
            compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
            matchedUrls = [url for url in all_urls if ie.suitable(url)]
            for mu in matchedUrls:
                compat_print('  ' + mu)
        sys.exit(0)
    if opts.list_extractor_descriptions:
-        for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
+        for ie in list_extractors(opts.age_limit):
            if not ie._WORKING:
                continue
            desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -560,6 +560,8 @@ from .zingmp3 import (
    ZingMp3AlbumIE,
 )
 from ..utils import age_restricted
 _ALL_CLASSES = [
    klass
    for name, klass in globals().items()
@ -575,6 +577,17 @@ def gen_extractors():
    return [klass() for klass in _ALL_CLASSES]
 def list_extractors(age_limit):
    """
    Return a list of extractors that are suitable for the given age,
    sorted by extractor ID.
    """
    return sorted(
        filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()),
        key=lambda ie: ie.IE_NAME.lower())
 def get_info_extractor(ie_name):
    """Returns the info extractor class with the given ie_name"""
    return globals()[ie_name + 'IE']
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -21,6 +21,7 @@ from ..compat import (
    compat_str,
 )
 from ..utils import (
    age_restricted,
    clean_html,
    compiled_regex_type,
    ExtractorError,
@ -877,6 +878,35 @@ class InfoExtractor(object):
            None, '/', True, False, expire_time, '', None, None, None)
        self._downloader.cookiejar.set_cookie(cookie)
    def get_testcases(self, include_onlymatching=False):
        t = getattr(self, '_TEST', None)
        if t:
            assert not hasattr(self, '_TESTS'), \
                '%s has _TEST and _TESTS' % type(self).__name__
            tests = [t]
        else:
            tests = getattr(self, '_TESTS', [])
        for t in tests:
            if not include_onlymatching and t.get('only_matching', False):
                continue
            t['name'] = type(self).__name__[:-len('IE')]
            yield t
    def is_suitable(self, age_limit):
        """ Test whether the extractor is generally suitable for the given
        age limit (i.e. pornographic sites are not, all others usually are) """
        any_restricted = False
        for tc in self.get_testcases(include_onlymatching=False):
            if 'playlist' in tc:
                tc = tc['playlist'][0]
            is_restricted = age_restricted(
                tc.get('info_dict', {}).get('age_limit'), age_limit)
            if not is_restricted:
                return True
            any_restricted = any_restricted or is_restricted
        return not any_restricted
 class SearchInfoExtractor(InfoExtractor):
    """
--- a/youtube_dl/extractor/xtube.py
+++ b/youtube_dl/extractor/xtube.py
@ -95,6 +95,7 @@ class XTubeUserIE(InfoExtractor):
        'url': 'http://www.xtube.com/community/profile.php?user=greenshowers',
        'info_dict': {
            'id': 'greenshowers',
            'age_limit': 18,
        },
        'playlist_mincount': 155,
    }
@ -124,6 +125,7 @@ class XTubeUserIE(InfoExtractor):
        return {
            '_type': 'playlist',
            'id': username,
            'age_limit': 18,
            'entries': [{
                '_type': 'url',
                'url': eurl,
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -1560,3 +1560,13 @@ def urlhandle_detect_ext(url_handle):
        getheader = url_handle.info().getheader
    return getheader('Content-Type').split("/")[1]
 def age_restricted(content_limit, age_limit):
    """ Returns True iff the content should be blocked """
    if age_limit is None:  # No limit set
        return False
    if content_limit is None:
        return False  # Content available for everyone
    return age_limit < content_limit