[youtube] Move cache into its own module

This commit is contained in:
Philipp Hagemeister 2014-09-03 12:41:05 +02:00
parent 88fc294f7f
commit a0e07d3161
6 changed files with 166 additions and 63 deletions

59
test/test_cache.py Normal file
View File

@ -0,0 +1,59 @@
#!/usr/bin/env python
# coding: utf-8
from __future__ import unicode_literals
import shutil
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL
from youtube_dl.cache import Cache
def _is_empty(d):
return not bool(os.listdir(d))
def _mkdir(d):
if not os.path.exists(d):
os.mkdir(d)
class TestCache(unittest.TestCase):
def setUp(self):
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata')
_mkdir(TESTDATA_DIR)
self.test_dir = os.path.join(TESTDATA_DIR, 'cache_test')
self.tearDown()
def tearDown(self):
if os.path.exists(self.test_dir):
shutil.rmtree(self.test_dir)
def test_cache(self):
ydl = FakeYDL({
'cachedir': self.test_dir,
})
c = Cache(ydl)
obj = {'x': 1, 'y': ['ä', '\\a', True]}
self.assertEqual(c.load('test_cache', 'k'), None)
c.store('test_cache', 'k', obj)
self.assertEqual(c.load('test_cache', 'k2'), None)
self.assertFalse(_is_empty(self.test_dir))
self.assertEqual(c.load('test_cache', 'k'), obj)
self.assertEqual(c.load('test_cache', 'y'), None)
self.assertEqual(c.load('test_cache2', 'k'), None)
c.remove()
self.assertFalse(os.path.exists(self.test_dir))
self.assertEqual(c.load('test_cache', 'k'), None)
if __name__ == '__main__':
unittest.main()

View File

@ -57,6 +57,7 @@ from .utils import (
YoutubeDLHandler, YoutubeDLHandler,
prepend_extension, prepend_extension,
) )
from .cache import Cache
from .extractor import get_info_extractor, gen_extractors from .extractor import get_info_extractor, gen_extractors
from .downloader import get_suitable_downloader from .downloader import get_suitable_downloader
from .postprocessor import FFmpegMergerPP from .postprocessor import FFmpegMergerPP
@ -133,7 +134,7 @@ class YoutubeDL(object):
daterange: A DateRange object, download only if the upload_date is in the range. daterange: A DateRange object, download only if the upload_date is in the range.
skip_download: Skip the actual download of the video file skip_download: Skip the actual download of the video file
cachedir: Location of the cache files in the filesystem. cachedir: Location of the cache files in the filesystem.
None to disable filesystem cache. False to disable filesystem cache.
noplaylist: Download single video instead of a playlist if in doubt. noplaylist: Download single video instead of a playlist if in doubt.
age_limit: An integer representing the user's age in years. age_limit: An integer representing the user's age in years.
Unsuitable videos for the given age are skipped. Unsuitable videos for the given age are skipped.
@ -195,6 +196,7 @@ class YoutubeDL(object):
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
self._err_file = sys.stderr self._err_file = sys.stderr
self.params = params self.params = params
self.cache = Cache(self)
if params.get('bidi_workaround', False): if params.get('bidi_workaround', False):
try: try:

View File

@ -84,7 +84,6 @@ import optparse
import os import os
import random import random
import shlex import shlex
import shutil
import sys import sys
@ -96,7 +95,6 @@ from .utils import (
decodeOption, decodeOption,
get_term_width, get_term_width,
DownloadError, DownloadError,
get_cachedir,
MaxDownloadsReached, MaxDownloadsReached,
preferredencoding, preferredencoding,
read_batch_urls, read_batch_urls,
@ -518,10 +516,10 @@ def parseOpts(overrideArguments=None):
filesystem.add_option('--cookies', filesystem.add_option('--cookies',
dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in') dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
filesystem.add_option( filesystem.add_option(
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', '--cache-dir', dest='cachedir', default=None, metavar='DIR',
help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.') help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
filesystem.add_option( filesystem.add_option(
'--no-cache-dir', action='store_const', const=None, dest='cachedir', '--no-cache-dir', action='store_const', const=False, dest='cachedir',
help='Disable filesystem caching') help='Disable filesystem caching')
filesystem.add_option( filesystem.add_option(
'--rm-cache-dir', action='store_true', dest='rm_cachedir', '--rm-cache-dir', action='store_true', dest='rm_cachedir',
@ -872,20 +870,7 @@ def _real_main(argv=None):
# Remove cache dir # Remove cache dir
if opts.rm_cachedir: if opts.rm_cachedir:
if opts.cachedir is None: ydl.cache.remove()
ydl.to_screen(u'No cache dir specified (Did you combine --no-cache-dir and --rm-cache-dir?)')
else:
if ('.cache' not in opts.cachedir) or ('youtube-dl' not in opts.cachedir):
ydl.to_screen(u'Not removing directory %s - this does not look like a cache dir' % opts.cachedir)
retcode = 141
else:
ydl.to_screen(
u'Removing cache dir %s .' % opts.cachedir,
skip_eol=True)
if os.path.exists(opts.cachedir):
ydl.to_screen(u'.', skip_eol=True)
shutil.rmtree(opts.cachedir)
ydl.to_screen(u'.')
# Maybe do nothing # Maybe do nothing
if (len(all_urls) < 1) and (opts.load_info_filename is None): if (len(all_urls) < 1) and (opts.load_info_filename is None):

93
youtube_dl/cache.py Normal file
View File

@ -0,0 +1,93 @@
from __future__ import unicode_literals
import errno
import io
import json
import os
import re
import shutil
import traceback
from .utils import (
write_json_file,
)
class Cache(object):
def __init__(self, ydl):
self._ydl = ydl
def _get_root_dir(self):
res = self._ydl.params.get('cachedir')
if res is None:
cache_root = os.environ.get('XDG_CACHE_HOME', '~/.cache')
res = os.path.join(cache_root, 'youtube-dl')
return os.path.expanduser(res)
def _get_cache_fn(self, section, key, dtype):
assert re.match(r'^[a-zA-Z0-9_-]+$', section)
assert re.match(r'^[a-zA-Z0-9_-]+$', key)
return os.path.join(
self._get_root_dir(), section, '%s.%s' % (key, dtype))
@property
def enabled(self):
return self._ydl.params.get('cachedir') is not False
def store(self, section, key, data, dtype='json'):
assert dtype in ('json',)
if not self.enabled:
return
fn = self._get_cache_fn(section, key, dtype)
try:
try:
os.makedirs(os.path.dirname(fn))
except OSError as ose:
if ose.errno != errno.EEXIST:
raise
write_json_file(data, fn)
except Exception:
tb = traceback.format_exc()
self._ydl.report_warning(
'Writing cache to %r failed: %s' % (fn, tb))
def load(self, section, key, dtype='json', default=None):
assert dtype in ('json',)
if not self.enabled:
return default
cache_fn = self._get_cache_fn(section, key, dtype)
try:
try:
with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
return json.load(cachef)
except ValueError:
try:
file_size = os.path.getsize(cache_fn)
except (OSError, IOError) as oe:
file_size = str(oe)
self._ydl.report_warning(
'Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
except IOError:
pass # No cache available
return default
def remove(self):
if not self.enabled:
self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)')
return
cachedir = self._get_root_dir()
if not any((term in cachedir) for term in ('cache', 'tmp')):
raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir)
self._ydl.to_screen(
'Removing cache dir %s .' % cachedir, skip_eol=True)
if os.path.exists(cachedir):
self._ydl.to_screen('.', skip_eol=True)
shutil.rmtree(cachedir)
self._ydl.to_screen('.')

View File

@ -1,7 +1,5 @@
# coding: utf-8 # coding: utf-8
import errno
import io
import itertools import itertools
import json import json
import os.path import os.path
@ -21,7 +19,6 @@ from ..utils import (
compat_str, compat_str,
clean_html, clean_html,
get_cachedir,
get_element_by_id, get_element_by_id,
get_element_by_attribute, get_element_by_attribute,
ExtractorError, ExtractorError,
@ -30,7 +27,6 @@ from ..utils import (
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
orderedSet, orderedSet,
write_json_file,
uppercase_escape, uppercase_escape,
) )
@ -435,26 +431,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
func_id = '%s_%s_%s' % ( func_id = '%s_%s_%s' % (
player_type, player_id, self._signature_cache_id(example_sig)) player_type, player_id, self._signature_cache_id(example_sig))
assert os.path.basename(func_id) == func_id assert os.path.basename(func_id) == func_id
cache_dir = get_cachedir(self._downloader.params)
cache_enabled = cache_dir is not None cache_spec = self._downloader.cache.load(u'youtube-sigfuncs', func_id)
if cache_enabled: if cache_spec is not None:
cache_fn = os.path.join(os.path.expanduser(cache_dir),
u'youtube-sigfuncs',
func_id + '.json')
try:
with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
cache_spec = json.load(cachef)
return lambda s: u''.join(s[i] for i in cache_spec) return lambda s: u''.join(s[i] for i in cache_spec)
except IOError:
pass # No cache available
except ValueError:
try:
file_size = os.path.getsize(cache_fn)
except (OSError, IOError) as oe:
file_size = str(oe)
self._downloader.report_warning(
u'Cache %s failed (%s)' % (cache_fn, file_size))
if player_type == 'js': if player_type == 'js':
code = self._download_webpage( code = self._download_webpage(
@ -472,22 +452,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
else: else:
assert False, 'Invalid player type %r' % player_type assert False, 'Invalid player type %r' % player_type
if cache_enabled: if cache_spec is None:
try:
test_string = u''.join(map(compat_chr, range(len(example_sig)))) test_string = u''.join(map(compat_chr, range(len(example_sig))))
cache_res = res(test_string) cache_res = res(test_string)
cache_spec = [ord(c) for c in cache_res] cache_spec = [ord(c) for c in cache_res]
try:
os.makedirs(os.path.dirname(cache_fn))
except OSError as ose:
if ose.errno != errno.EEXIST:
raise
write_json_file(cache_spec, cache_fn)
except Exception:
tb = traceback.format_exc()
self._downloader.report_warning(
u'Writing cache to %r failed: %s' % (cache_fn, tb))
self._downloader.cache.store(u'youtube-sigfuncs', func_id, cache_spec)
return res return res
def _print_sig_code(self, func, example_sig): def _print_sig_code(self, func, example_sig):

View File

@ -1076,12 +1076,6 @@ def intlist_to_bytes(xs):
return bytes(xs) return bytes(xs)
def get_cachedir(params={}):
cache_root = os.environ.get('XDG_CACHE_HOME',
os.path.expanduser('~/.cache'))
return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
# Cross-platform file locking # Cross-platform file locking
if sys.platform == 'win32': if sys.platform == 'win32':
import ctypes.wintypes import ctypes.wintypes