mirror of https://github.com/borgbackup/borg.git
refactor new zero chunk handling to be reusable
This commit is contained in:
parent
6d0f9a52eb
commit
9fd284ce1a
|
@ -19,7 +19,7 @@ from .logger import create_logger
|
||||||
logger = create_logger()
|
logger = create_logger()
|
||||||
|
|
||||||
from . import xattr
|
from . import xattr
|
||||||
from .chunker import get_chunker, max_chunk_size, Chunk
|
from .chunker import get_chunker, max_chunk_size, Chunk, chunk_to_id_data
|
||||||
from .cache import ChunkListEntry
|
from .cache import ChunkListEntry
|
||||||
from .crypto.key import key_factory
|
from .crypto.key import key_factory
|
||||||
from .compress import Compressor, CompressionSpec
|
from .compress import Compressor, CompressionSpec
|
||||||
|
@ -43,7 +43,6 @@ from .helpers import msgpack
|
||||||
from .helpers import sig_int
|
from .helpers import sig_int
|
||||||
from .patterns import PathPrefixPattern, FnmatchPattern, IECommand
|
from .patterns import PathPrefixPattern, FnmatchPattern, IECommand
|
||||||
from .item import Item, ArchiveItem, ItemDiff
|
from .item import Item, ArchiveItem, ItemDiff
|
||||||
from .lrucache import LRUCache
|
|
||||||
from .platform import acl_get, acl_set, set_flags, get_flags, swidth, hostname
|
from .platform import acl_get, acl_set, set_flags, get_flags, swidth, hostname
|
||||||
from .remote import cache_if_remote
|
from .remote import cache_if_remote
|
||||||
from .repository import Repository, LIST_SCAN_LIMIT
|
from .repository import Repository, LIST_SCAN_LIMIT
|
||||||
|
@ -1105,8 +1104,6 @@ class ChunksProcessor:
|
||||||
self.checkpoint_interval = checkpoint_interval
|
self.checkpoint_interval = checkpoint_interval
|
||||||
self.last_checkpoint = time.monotonic()
|
self.last_checkpoint = time.monotonic()
|
||||||
self.rechunkify = rechunkify
|
self.rechunkify = rechunkify
|
||||||
self.zero_chunk_ids = LRUCache(10, dispose=lambda _: None) # length of all-zero chunk -> chunk_id
|
|
||||||
self.zeros = memoryview(bytes(MAX_DATA_SIZE))
|
|
||||||
|
|
||||||
def write_part_file(self, item, from_chunk, number):
|
def write_part_file(self, item, from_chunk, number):
|
||||||
item = Item(internal_dict=item.as_dict())
|
item = Item(internal_dict=item.as_dict())
|
||||||
|
@ -1139,20 +1136,7 @@ class ChunksProcessor:
|
||||||
def process_file_chunks(self, item, cache, stats, show_progress, chunk_iter, chunk_processor=None):
|
def process_file_chunks(self, item, cache, stats, show_progress, chunk_iter, chunk_processor=None):
|
||||||
if not chunk_processor:
|
if not chunk_processor:
|
||||||
def chunk_processor(chunk):
|
def chunk_processor(chunk):
|
||||||
allocation = chunk.meta['allocation']
|
chunk_id, data = chunk_to_id_data(chunk, self.key.id_hash)
|
||||||
if allocation == CH_DATA:
|
|
||||||
data = chunk.data
|
|
||||||
chunk_id = self.key.id_hash(data)
|
|
||||||
elif allocation in (CH_HOLE, CH_ALLOC):
|
|
||||||
size = chunk.meta['size']
|
|
||||||
data = self.zeros[:size]
|
|
||||||
try:
|
|
||||||
chunk_id = self.zero_chunk_ids[size]
|
|
||||||
except KeyError:
|
|
||||||
chunk_id = self.key.id_hash(data)
|
|
||||||
self.zero_chunk_ids[size] = chunk_id
|
|
||||||
else:
|
|
||||||
raise ValueError('unexpected allocation type')
|
|
||||||
chunk_entry = cache.add_chunk(chunk_id, data, stats, wait=False)
|
chunk_entry = cache.add_chunk(chunk_id, data, stats, wait=False)
|
||||||
self.cache.repository.async_response(wait=False)
|
self.cache.repository.async_response(wait=False)
|
||||||
return chunk_entry
|
return chunk_entry
|
||||||
|
|
|
@ -6,7 +6,8 @@ import errno
|
||||||
import os
|
import os
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
|
|
||||||
from .constants import CH_DATA, CH_ALLOC, CH_HOLE
|
from .constants import CH_DATA, CH_ALLOC, CH_HOLE, MAX_DATA_SIZE
|
||||||
|
from .lrucache import LRUCache
|
||||||
|
|
||||||
from libc.stdlib cimport free
|
from libc.stdlib cimport free
|
||||||
|
|
||||||
|
@ -52,6 +53,33 @@ def Chunk(data, **meta):
|
||||||
return _Chunk(meta, data)
|
return _Chunk(meta, data)
|
||||||
|
|
||||||
|
|
||||||
|
zeros = bytes(MAX_DATA_SIZE)
|
||||||
|
|
||||||
|
# remember a few recently used all-zero chunk hashes in this mapping.
|
||||||
|
# (hash_func, chunk_length) -> chunk_hash
|
||||||
|
# we play safe and have the hash_func in the mapping key, in case we
|
||||||
|
# have different hash_funcs within the same borg run.
|
||||||
|
zero_chunk_ids = LRUCache(10, dispose=lambda _: None)
|
||||||
|
|
||||||
|
def chunk_to_id_data(chunk, id_hash):
|
||||||
|
allocation = chunk.meta['allocation']
|
||||||
|
if allocation == CH_DATA:
|
||||||
|
data = chunk.data
|
||||||
|
chunk_id = id_hash(data)
|
||||||
|
elif allocation in (CH_HOLE, CH_ALLOC):
|
||||||
|
size = chunk.meta['size']
|
||||||
|
assert size <= len(zeros)
|
||||||
|
data = memoryview(zeros)[:size]
|
||||||
|
try:
|
||||||
|
chunk_id = zero_chunk_ids[(id_hash, size)]
|
||||||
|
except KeyError:
|
||||||
|
chunk_id = id_hash(data)
|
||||||
|
zero_chunk_ids[(id_hash, size)] = chunk_id
|
||||||
|
else:
|
||||||
|
raise ValueError('unexpected allocation type')
|
||||||
|
return chunk_id, data
|
||||||
|
|
||||||
|
|
||||||
def dread(offset, size, fd=None, fh=-1):
|
def dread(offset, size, fd=None, fh=-1):
|
||||||
use_fh = fh >= 0
|
use_fh = fh >= 0
|
||||||
if use_fh:
|
if use_fh:
|
||||||
|
|
Loading…
Reference in New Issue