Merge pull request #7309 from ThomasWaldmann/no-part-files-in-final-archive

remove part files from final archive
This commit is contained in:
TW 2023-02-03 01:33:47 +01:00 committed by GitHub
commit 9b7647c89d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 118 additions and 211 deletions

View File

@ -132,10 +132,10 @@ During a backup, a special checkpoint archive named ``<archive-name>.checkpoint`
is saved at every checkpoint interval (the default value for this is 30
minutes) containing all the data backed-up until that point.
This checkpoint archive is a valid archive,
but it is only a partial backup (not all files that you wanted to back up are
contained in it). Having it in the repo until a successful, full backup is
completed is useful because it references all the transmitted chunks up
This checkpoint archive is a valid archive, but it is only a partial backup
(not all files that you wanted to back up are contained in it and the last file
in it might be a partial file). Having it in the repo until a successful, full
backup is completed is useful because it references all the transmitted chunks up
to the checkpoint. This means that in case of an interruption, you only need to
retransfer the data since the last checkpoint.
@ -154,14 +154,12 @@ Once your backup has finished successfully, you can delete all
``<archive-name>.checkpoint`` archives. If you run ``borg prune``, it will
also care for deleting unneeded checkpoints.
Note: the checkpointing mechanism creates hidden, partial files in an archive,
so that checkpoints even work while a big file is being processed.
They are named ``<filename>.borg_part_<N>`` and all operations usually ignore
these files, but you can make them considered by giving the option
``--consider-part-files``. You usually only need that option if you are
really desperate (e.g. if you have no completed backup of that file and you'd
rather get a partial file extracted than nothing). You do **not** want to give
that option under any normal circumstances.
Note: the checkpointing mechanism may create a partial (truncated) last file
in a checkpoint archive named ``<filename>.borg_part``. Such partial files
won't be contained in the final archive.
This is done so that checkpoints work cleanly and promptly while a big
file is being processed.
How can I back up huge file(s) over a unstable connection?
---------------------------------------------------------
@ -171,10 +169,8 @@ Yes. For more details, see :ref:`checkpoints_parts`.
How can I restore huge file(s) over an unstable connection?
-----------------------------------------------------------
If you cannot manage to extract the whole big file in one go, you can extract
all the part files and manually concatenate them together.
For more details, see :ref:`checkpoints_parts`.
Try using ``borg mount`` and ``rsync`` (or a similar tool that supports
resuming a partial file copy from what's already copied).
How can I switch append-only mode on and off?
-----------------------------------------------------------------------------------------------------------------------------------

View File

@ -58,7 +58,6 @@ class Statistics:
self.output_json = output_json
self.iec = iec
self.osize = self.usize = self.nfiles = 0
self.osize_parts = self.usize_parts = self.nfiles_parts = 0
self.last_progress = 0 # timestamp when last progress was shown
self.files_stats = defaultdict(int)
self.chunking_time = 0.0
@ -66,15 +65,10 @@ class Statistics:
self.rx_bytes = 0
self.tx_bytes = 0
def update(self, size, unique, part=False):
if not part:
self.osize += size
if unique:
self.usize += size
else:
self.osize_parts += size
if unique:
self.usize_parts += size
def update(self, size, unique):
self.osize += size
if unique:
self.usize += size
def __add__(self, other):
if not isinstance(other, Statistics):
@ -83,9 +77,6 @@ class Statistics:
stats.osize = self.osize + other.osize
stats.usize = self.usize + other.usize
stats.nfiles = self.nfiles + other.nfiles
stats.osize_parts = self.osize_parts + other.osize_parts
stats.usize_parts = self.usize_parts + other.usize_parts
stats.nfiles_parts = self.nfiles_parts + other.nfiles_parts
stats.chunking_time = self.chunking_time + other.chunking_time
stats.hashing_time = self.hashing_time + other.hashing_time
for key in other.files_stats:
@ -134,20 +125,13 @@ Bytes sent to remote: {stats.tx_bytes}
}
def as_raw_dict(self):
return {
"size": self.osize,
"nfiles": self.nfiles,
"size_parts": self.osize_parts,
"nfiles_parts": self.nfiles_parts,
}
return {"size": self.osize, "nfiles": self.nfiles}
@classmethod
def from_raw_dict(cls, **kw):
self = cls()
self.osize = kw["size"]
self.nfiles = kw["nfiles"]
self.osize_parts = kw["size_parts"]
self.nfiles_parts = kw["nfiles_parts"]
return self
@property
@ -353,6 +337,7 @@ class ChunkBuffer:
self.chunks = []
self.key = key
self.chunker = get_chunker(*chunker_params, seed=self.key.chunk_seed, sparse=False)
self.saved_chunks_len = None
def add(self, item):
self.buffer.write(self.packer.pack(item.as_dict()))
@ -392,6 +377,18 @@ class ChunkBuffer:
def is_full(self):
return self.buffer.tell() > self.BUFFER_SIZE
def save_chunks_state(self):
# as we only append to self.chunks, remembering the current length is good enough
self.saved_chunks_len = len(self.chunks)
def restore_chunks_state(self):
scl = self.saved_chunks_len
assert scl is not None, "forgot to call save_chunks_state?"
tail_chunks = self.chunks[scl:]
del self.chunks[scl:]
self.saved_chunks_len = None
return tail_chunks
class CacheChunkBuffer(ChunkBuffer):
def __init__(self, cache, key, stats, chunker_params=ITEMS_CHUNKER_PARAMS):
@ -484,7 +481,6 @@ class Archive:
start=None,
start_monotonic=None,
end=None,
consider_part_files=False,
log_json=False,
iec=False,
):
@ -519,7 +515,6 @@ class Archive:
if end is None:
end = archive_ts_now()
self.end = end
self.consider_part_files = consider_part_files
self.pipeline = DownloadPipeline(self.repository, self.repo_objs)
self.create = create
if self.create:
@ -629,9 +624,6 @@ Duration: {0.duration}
return "Archive(%r)" % self.name
def item_filter(self, item, filter=None):
if not self.consider_part_files and "part" in item:
# this is a part(ial) file, we usually don't want to consider it.
return False
return filter(item) if filter else True
def iter_items(self, filter=None, preload=False):
@ -649,6 +641,15 @@ Duration: {0.duration}
stats.show_progress(item=item, dt=0.2)
self.items_buffer.add(item)
def prepare_checkpoint(self):
# we need to flush the archive metadata stream to repo chunks, so that
# we have the metadata stream chunks WITHOUT the part file item we add later.
# The part file item will then get into its own metadata stream chunk, which we
# can easily NOT include into the next checkpoint or the final archive.
self.items_buffer.flush(flush=True)
# remember the current state of self.chunks, which corresponds to the flushed chunks
self.items_buffer.save_chunks_state()
def write_checkpoint(self):
metadata = self.save(self.checkpoint_name)
# that .save() has committed the repo.
@ -660,6 +661,11 @@ Duration: {0.duration}
self.cache.chunk_decref(self.id, self.stats)
for id in metadata.item_ptrs:
self.cache.chunk_decref(id, self.stats)
# also get rid of that part item, we do not want to have it in next checkpoint or final archive
tail_chunks = self.items_buffer.restore_chunks_state()
# tail_chunks contain the tail of the archive items metadata stream, not needed for next commit.
for id in tail_chunks:
self.cache.chunk_decref(id, self.stats)
def save(self, name=None, comment=None, timestamp=None, stats=None, additional_metadata=None):
name = name or self.name
@ -694,14 +700,7 @@ Duration: {0.duration}
# because borg info relies on them. so, either use the given stats (from args)
# or fall back to self.stats if it was not given.
stats = stats or self.stats
metadata.update(
{
"size": stats.osize,
"nfiles": stats.nfiles,
"size_parts": stats.osize_parts,
"nfiles_parts": stats.nfiles_parts,
}
)
metadata.update({"size": stats.osize, "nfiles": stats.nfiles})
metadata.update(additional_metadata or {})
metadata = ArchiveItem(metadata)
data = self.key.pack_and_authenticate_metadata(metadata.as_dict(), context=b"archive")
@ -751,12 +750,9 @@ Duration: {0.duration}
pi.finish()
stats = Statistics(iec=self.iec)
stats.usize = unique_size # the part files use same chunks as the full file
stats.usize = unique_size
stats.nfiles = self.metadata.nfiles
stats.osize = self.metadata.size
if self.consider_part_files:
stats.nfiles += self.metadata.nfiles_parts
stats.osize += self.metadata.size_parts
return stats
@contextmanager
@ -1038,9 +1034,9 @@ Duration: {0.duration}
error = True
return exception_ignored # must not return None here
def chunk_decref(id, stats, part=False):
def chunk_decref(id, stats):
try:
self.cache.chunk_decref(id, stats, wait=False, part=part)
self.cache.chunk_decref(id, stats, wait=False)
except KeyError:
cid = bin_to_hex(id)
raise ChunksIndexError(cid)
@ -1064,9 +1060,8 @@ Duration: {0.duration}
for item in unpacker:
item = Item(internal_dict=item)
if "chunks" in item:
part = not self.consider_part_files and "part" in item
for chunk_id, size in item.chunks:
chunk_decref(chunk_id, stats, part=part)
chunk_decref(chunk_id, stats)
except (TypeError, ValueError):
# if items metadata spans multiple chunks and one chunk got dropped somehow,
# it could be that unpacker yields bad types
@ -1234,10 +1229,22 @@ def cached_hash(chunk, id_hash):
class ChunksProcessor:
# Processes an iterator of chunks for an Item
def __init__(self, *, key, cache, add_item, write_checkpoint, checkpoint_interval, checkpoint_volume, rechunkify):
def __init__(
self,
*,
key,
cache,
add_item,
prepare_checkpoint,
write_checkpoint,
checkpoint_interval,
checkpoint_volume,
rechunkify,
):
self.key = key
self.cache = cache
self.add_item = add_item
self.prepare_checkpoint = prepare_checkpoint
self.write_checkpoint = write_checkpoint
self.rechunkify = rechunkify
# time interval based checkpointing
@ -1248,38 +1255,34 @@ class ChunksProcessor:
self.current_volume = 0
self.last_volume_checkpoint = 0
def write_part_file(self, item, from_chunk, number):
def write_part_file(self, item):
self.prepare_checkpoint()
item = Item(internal_dict=item.as_dict())
length = len(item.chunks)
# the item should only have the *additional* chunks we processed after the last partial item:
item.chunks = item.chunks[from_chunk:]
# for borg recreate, we already have a size member in the source item (giving the total file size),
# but we consider only a part of the file here, thus we must recompute the size from the chunks:
item.get_size(memorize=True, from_chunks=True)
item.path += ".borg_part_%d" % number
item.part = number
number += 1
item.path += ".borg_part"
self.add_item(item, show_progress=False)
self.write_checkpoint()
return length, number
def maybe_checkpoint(self, item, from_chunk, part_number, forced=False):
def maybe_checkpoint(self, item):
checkpoint_done = False
sig_int_triggered = sig_int and sig_int.action_triggered()
if (
forced
or sig_int_triggered
sig_int_triggered
or (self.checkpoint_interval and time.monotonic() - self.last_checkpoint > self.checkpoint_interval)
or (self.checkpoint_volume and self.current_volume - self.last_volume_checkpoint >= self.checkpoint_volume)
):
if sig_int_triggered:
logger.info("checkpoint requested: starting checkpoint creation...")
from_chunk, part_number = self.write_part_file(item, from_chunk, part_number)
self.write_part_file(item)
checkpoint_done = True
self.last_checkpoint = time.monotonic()
self.last_volume_checkpoint = self.current_volume
if sig_int_triggered:
sig_int.action_completed()
logger.info("checkpoint requested: finished checkpoint creation!")
return from_chunk, part_number
return checkpoint_done # whether a checkpoint archive was created
def process_file_chunks(self, item, cache, stats, show_progress, chunk_iter, chunk_processor=None):
if not chunk_processor:
@ -1297,28 +1300,13 @@ class ChunksProcessor:
# to get rid of .chunks_healthy, as it might not correspond to .chunks any more.
if self.rechunkify and "chunks_healthy" in item:
del item.chunks_healthy
from_chunk = 0
part_number = 1
for chunk in chunk_iter:
cle = chunk_processor(chunk)
item.chunks.append(cle)
self.current_volume += cle[1]
if show_progress:
stats.show_progress(item=item, dt=0.2)
from_chunk, part_number = self.maybe_checkpoint(item, from_chunk, part_number, forced=False)
else:
if part_number > 1:
if item.chunks[from_chunk:]:
# if we already have created a part item inside this file, we want to put the final
# chunks (if any) into a part item also (so all parts can be concatenated to get
# the complete file):
from_chunk, part_number = self.maybe_checkpoint(item, from_chunk, part_number, forced=True)
# if we created part files, we have referenced all chunks from the part files,
# but we also will reference the same chunks also from the final, complete file:
for chunk in item.chunks:
cache.chunk_incref(chunk.id, stats, size=chunk.size, part=True)
stats.nfiles_parts += part_number - 1
self.maybe_checkpoint(item)
class FilesystemObjectProcessors:
@ -2474,6 +2462,7 @@ class ArchiveRecreater:
cache=self.cache,
key=self.key,
add_item=target.add_item,
prepare_checkpoint=target.prepare_checkpoint,
write_checkpoint=target.write_checkpoint,
checkpoint_interval=self.checkpoint_interval,
checkpoint_volume=self.checkpoint_volume,

View File

@ -149,7 +149,6 @@ def with_repository(
progress=getattr(args, "progress", False),
lock_wait=self.lock_wait,
cache_mode=getattr(args, "files_cache_mode", FILES_CACHE_MODE_DISABLED),
consider_part_files=getattr(args, "consider_part_files", False),
iec=getattr(args, "iec", False),
) as cache_:
return method(self, args, repository=repository, cache=cache_, **kwargs)
@ -214,7 +213,6 @@ def with_other_repository(manifest=False, cache=False, compatibility=None):
progress=False,
lock_wait=self.lock_wait,
cache_mode=getattr(args, "files_cache_mode", FILES_CACHE_MODE_DISABLED),
consider_part_files=getattr(args, "consider_part_files", False),
iec=getattr(args, "iec", False),
) as cache_:
kwargs["other_cache"] = cache_
@ -240,7 +238,6 @@ def with_archive(method):
noacls=getattr(args, "noacls", False),
noxattrs=getattr(args, "noxattrs", False),
cache=kwargs.get("cache"),
consider_part_files=args.consider_part_files,
log_json=args.log_json,
iec=args.iec,
)
@ -542,12 +539,6 @@ def define_common_options(add_common_option):
type=int,
help="set network upload buffer size in MiB. (default: 0=no buffer)",
)
add_common_option(
"--consider-part-files",
dest="consider_part_files",
action="store_true",
help="treat part files like normal files (e.g. to list/extract them)",
)
add_common_option(
"--debug-profile",
metavar="FILE",

View File

@ -255,6 +255,7 @@ class CreateMixIn:
cache=cache,
key=key,
add_item=archive.add_item,
prepare_checkpoint=archive.prepare_checkpoint,
write_checkpoint=archive.write_checkpoint,
checkpoint_interval=args.checkpoint_interval,
checkpoint_volume=args.checkpoint_volume,

View File

@ -33,7 +33,7 @@ class DebugMixIn:
def do_debug_dump_archive_items(self, args, repository, manifest):
"""dump (decrypted, decompressed) archive items metadata (not: data)"""
repo_objs = manifest.repo_objs
archive = Archive(manifest, args.name, consider_part_files=args.consider_part_files)
archive = Archive(manifest, args.name)
for i, item_id in enumerate(archive.metadata.items):
_, data = repo_objs.parse(item_id, repository.get(item_id))
filename = "%06d_%s.items" % (i, bin_to_hex(item_id))

View File

@ -79,9 +79,7 @@ class DeleteMixIn:
logger_list.info(msg_delete.format(format_archive(archive_info), i, len(archive_names)))
if not dry_run:
archive = Archive(
manifest, archive_name, cache=cache, consider_part_files=args.consider_part_files
)
archive = Archive(manifest, archive_name, cache=cache)
archive.delete(stats, progress=args.progress, forced=args.forced)
checkpointed = self.maybe_checkpoint(
checkpoint_func=checkpoint_func, checkpoint_interval=args.checkpoint_interval

View File

@ -27,7 +27,7 @@ class DiffMixIn:
print_output = print_json_output if args.json_lines else print_text_output
archive1 = archive
archive2 = Archive(manifest, args.other_name, consider_part_files=args.consider_part_files)
archive2 = Archive(manifest, args.other_name)
can_compare_chunk_ids = (
archive1.metadata.get("chunker_params", False) == archive2.metadata.get("chunker_params", True)

View File

@ -24,9 +24,7 @@ class InfoMixIn:
output_data = []
for i, archive_name in enumerate(archive_names, 1):
archive = Archive(
manifest, archive_name, cache=cache, consider_part_files=args.consider_part_files, iec=args.iec
)
archive = Archive(manifest, archive_name, cache=cache, iec=args.iec)
info = archive.info()
if args.json:
output_data.append(info)

View File

@ -27,7 +27,7 @@ class ListMixIn:
format = "{mode} {user:6} {group:6} {size:8} {mtime} {path}{extra}{NL}"
def _list_inner(cache):
archive = Archive(manifest, args.name, cache=cache, consider_part_files=args.consider_part_files)
archive = Archive(manifest, args.name, cache=cache)
formatter = ItemFormatter(archive, format, json_lines=args.json_lines)
for item in archive.iter_items(lambda item: matcher.match(item.path)):

View File

@ -142,7 +142,7 @@ class PruneMixIn:
else:
archives_deleted += 1
log_message = "Pruning archive (%d/%d):" % (archives_deleted, to_delete_len)
archive = Archive(manifest, archive.name, cache, consider_part_files=args.consider_part_files)
archive = Archive(manifest, archive.name, cache)
archive.delete(stats, forced=args.forced)
checkpointed = self.maybe_checkpoint(
checkpoint_func=checkpoint_func, checkpoint_interval=args.checkpoint_interval

View File

@ -271,6 +271,7 @@ class TarMixIn:
cache=cache,
key=key,
add_item=archive.add_item,
prepare_checkpoint=archive.prepare_checkpoint,
write_checkpoint=archive.write_checkpoint,
checkpoint_interval=args.checkpoint_interval,
checkpoint_volume=args.checkpoint_volume,

View File

@ -89,6 +89,14 @@ class TransferMixIn:
archive = Archive(manifest, name, cache=cache, create=True) if not dry_run else None
upgrader.new_archive(archive=archive)
for item in other_archive.iter_items():
is_part = bool(item.get("part", False))
if is_part:
# borg 1.x created part files while checkpointing (in addition to the full
# file in the final archive), like <filename>.borg_part_<part> with item.part >= 1.
# borg2 archives do not have such special part items anymore.
# so let's remove them from old archives also, considering there is no
# code any more that deals with them in special ways (e.g. to get stats right).
continue
if "chunks" in item:
chunks = []
for chunk_id, size in item.chunks:

View File

@ -404,7 +404,6 @@ class Cache:
lock_wait=None,
permit_adhoc_cache=False,
cache_mode=FILES_CACHE_MODE_DISABLED,
consider_part_files=False,
iec=False,
):
def local():
@ -417,11 +416,10 @@ class Cache:
iec=iec,
lock_wait=lock_wait,
cache_mode=cache_mode,
consider_part_files=consider_part_files,
)
def adhoc():
return AdHocCache(manifest=manifest, lock_wait=lock_wait, iec=iec, consider_part_files=consider_part_files)
return AdHocCache(manifest=manifest, lock_wait=lock_wait, iec=iec)
if not permit_adhoc_cache:
return local()
@ -464,14 +462,11 @@ Total chunks: {0.total_chunks}
# XXX: this should really be moved down to `hashindex.pyx`
total_size, unique_size, total_unique_chunks, total_chunks = self.chunks.summarize()
# the above values have the problem that they do not consider part files,
# thus the total_size might be too high (chunks referenced
# by the part files AND by the complete file).
# since borg 1.2 we have new archive metadata telling the total size per archive,
# so we can just sum up all archives to get the "all archives" stats:
total_size = 0
for archive_name in self.manifest.archives:
archive = Archive(self.manifest, archive_name, consider_part_files=self.consider_part_files)
archive = Archive(self.manifest, archive_name)
stats = archive.calc_stats(self, want_unique=False)
total_size += stats.osize
stats = self.Summary(total_size, unique_size, total_unique_chunks, total_chunks)._asdict()
@ -498,7 +493,6 @@ class LocalCache(CacheStatsMixin):
progress=False,
lock_wait=None,
cache_mode=FILES_CACHE_MODE_DISABLED,
consider_part_files=False,
iec=False,
):
"""
@ -515,7 +509,6 @@ class LocalCache(CacheStatsMixin):
self.repo_objs = manifest.repo_objs
self.progress = progress
self.cache_mode = cache_mode
self.consider_part_files = consider_part_files
self.timestamp = None
self.txn_active = False
@ -971,23 +964,23 @@ class LocalCache(CacheStatsMixin):
)
return refcount
def chunk_incref(self, id, stats, size=None, part=False):
def chunk_incref(self, id, stats, size=None):
if not self.txn_active:
self.begin_txn()
count, _size = self.chunks.incref(id)
stats.update(_size, False, part=part)
stats.update(_size, False)
return ChunkListEntry(id, _size)
def chunk_decref(self, id, stats, wait=True, part=False):
def chunk_decref(self, id, stats, wait=True):
if not self.txn_active:
self.begin_txn()
count, size = self.chunks.decref(id)
if count == 0:
del self.chunks[id]
self.repository.delete(id, wait=wait)
stats.update(-size, True, part=part)
stats.update(-size, True)
else:
stats.update(-size, False, part=part)
stats.update(-size, False)
def file_known_and_unchanged(self, hashed_path, path_hash, st):
"""
@ -1084,14 +1077,13 @@ All archives: unknown unknown unknown
Unique chunks Total chunks
Chunk index: {0.total_unique_chunks:20d} unknown"""
def __init__(self, manifest, warn_if_unencrypted=True, lock_wait=None, consider_part_files=False, iec=False):
def __init__(self, manifest, warn_if_unencrypted=True, lock_wait=None, iec=False):
CacheStatsMixin.__init__(self, iec=iec)
assert isinstance(manifest, Manifest)
self.manifest = manifest
self.repository = manifest.repository
self.key = manifest.key
self.repo_objs = manifest.repo_objs
self.consider_part_files = consider_part_files
self._txn_active = False
self.security_manager = SecurityManager(self.repository)
@ -1145,7 +1137,7 @@ Chunk index: {0.total_unique_chunks:20d} unknown"""
self.chunks[id] = entry._replace(size=size)
return entry.refcount
def chunk_incref(self, id, stats, size=None, part=False):
def chunk_incref(self, id, stats, size=None):
if not self._txn_active:
self.begin_txn()
count, _size = self.chunks.incref(id)
@ -1153,19 +1145,19 @@ Chunk index: {0.total_unique_chunks:20d} unknown"""
# size or add_chunk); we can't add references to those (size=0 is invalid) and generally don't try to.
size = _size or size
assert size
stats.update(size, False, part=part)
stats.update(size, False)
return ChunkListEntry(id, size)
def chunk_decref(self, id, stats, wait=True, part=False):
def chunk_decref(self, id, stats, wait=True):
if not self._txn_active:
self.begin_txn()
count, size = self.chunks.decref(id)
if count == 0:
del self.chunks[id]
self.repository.delete(id, wait=wait)
stats.update(-size, True, part=part)
stats.update(-size, True)
else:
stats.update(-size, False, part=part)
stats.update(-size, False)
def commit(self):
if not self._txn_active:

View File

@ -38,8 +38,6 @@ cache_sync_init(HashIndex *chunks)
unpack_init(&ctx->ctx);
/* needs to be set only once */
ctx->ctx.user.chunks = chunks;
ctx->ctx.user.parts.size = 0;
ctx->ctx.user.parts.num_files = 0;
ctx->ctx.user.totals.size = 0;
ctx->ctx.user.totals.num_files = 0;
ctx->buf = NULL;
@ -71,24 +69,12 @@ cache_sync_num_files_totals(const CacheSyncCtx *ctx)
return ctx->ctx.user.totals.num_files;
}
static uint64_t
cache_sync_num_files_parts(const CacheSyncCtx *ctx)
{
return ctx->ctx.user.parts.num_files;
}
static uint64_t
cache_sync_size_totals(const CacheSyncCtx *ctx)
{
return ctx->ctx.user.totals.size;
}
static uint64_t
cache_sync_size_parts(const CacheSyncCtx *ctx)
{
return ctx->ctx.user.parts.size;
}
/**
* feed data to the cache synchronizer
* 0 = abort, 1 = continue

View File

@ -40,7 +40,7 @@
#endif
typedef struct unpack_user {
/* Item.chunks and Item.part are at the top level; we don't care about anything else,
/* Item.chunks is at the top level; we don't care about anything else,
* only need to track the current level to navigate arbitrary and unknown structure.
* To discern keys from everything else on the top level we use expect_map_item_end.
*/
@ -58,15 +58,12 @@ typedef struct unpack_user {
*/
int inside_chunks;
/* is this item a .part file (created for checkpointing inside files)? */
int part;
/* does this item have a chunks list in it? */
int has_chunks;
enum {
/* the next thing is a map key at the Item root level,
* and it might be the "chunks" or "part" key we're looking for */
* and it might be e.g. the "chunks" key we're looking for */
expect_map_key,
/* blocking state to expect_map_key
@ -114,11 +111,6 @@ typedef struct unpack_user {
uint64_t size, num_files;
} totals;
/* total sizes and files count coming from part files */
struct {
uint64_t size, num_files;
} parts;
} unpack_user;
struct unpack_context;
@ -317,7 +309,6 @@ static inline int unpack_callback_map(unpack_user* u, unsigned int n)
}
/* This begins a new Item */
u->expect = expect_map_key;
u->part = 0;
u->has_chunks = 0;
u->item.size = 0;
}
@ -358,10 +349,6 @@ static inline int unpack_callback_map_end(unpack_user* u)
if(u->level == 0) {
/* This ends processing of an Item */
if(u->has_chunks) {
if(u->part) {
u->parts.num_files += 1;
u->parts.size += u->item.size;
}
u->totals.num_files += 1;
u->totals.size += u->item.size;
}
@ -381,9 +368,6 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char*
u->expect = expect_chunks_begin;
u->inside_chunks = 1;
u->has_chunks = 1;
} else if(length == 4 && !memcmp("part", p, 4)) {
u->expect = expect_map_item_end;
u->part = 1;
} else {
u->expect = expect_map_item_end;
}

View File

@ -18,7 +18,9 @@ ARCHIVE_KEYS = frozenset(['version', 'name', 'hostname', 'username', 'time', 'ti
'command_line', 'recreate_command_line', # v2+ archives
'cmdline', 'recreate_cmdline', # legacy
'recreate_source_id', 'recreate_args', 'recreate_partial_chunks', # used in 1.1.0b1 .. b2
'size', 'nfiles', 'size_parts', 'nfiles_parts'])
'size', 'nfiles',
'size_parts', 'nfiles_parts', # legacy v1 archives
])
# fmt: on
# this is the set of keys that are always present in archives:

View File

@ -147,7 +147,7 @@ class ItemCache:
else:
raise ValueError("Invalid entry type in self.meta")
def iter_archive_items(self, archive_item_ids, filter=None, consider_part_files=False):
def iter_archive_items(self, archive_item_ids, filter=None):
unpacker = msgpack.Unpacker()
# Current offset in the metadata stream, which consists of all metadata chunks glued together
@ -193,7 +193,7 @@ class ItemCache:
break
item = Item(internal_dict=item)
if filter and not filter(item) or not consider_part_files and "part" in item:
if filter and not filter(item):
msgpacked_bytes = b""
continue
@ -330,15 +330,13 @@ class FuseBackend:
"""Build FUSE inode hierarchy from archive metadata"""
self.file_versions = {} # for versions mode: original path -> version
t0 = time.perf_counter()
archive = Archive(self._manifest, archive_name, consider_part_files=self._args.consider_part_files)
archive = Archive(self._manifest, archive_name)
strip_components = self._args.strip_components
matcher = build_matcher(self._args.patterns, self._args.paths)
hlm = HardLinkManager(id_type=bytes, info_type=str) # hlid -> path
filter = build_filter(matcher, strip_components)
for item_inode, item in self.cache.iter_archive_items(
archive.metadata.items, filter=filter, consider_part_files=self._args.consider_part_files
):
for item_inode, item in self.cache.iter_archive_items(archive.metadata.items, filter=filter):
if strip_components:
item.path = os.sep.join(item.path.split(os.sep)[strip_components:])
path = os.fsencode(item.path)

View File

@ -79,11 +79,7 @@ class FuseVersionsIndex(IndexBase):
def __setitem__(self, key: bytes, value: Any) -> None: ...
class CacheSynchronizer:
csize_parts: int
csize_totals: int
num_files_parts: int
num_files_totals: int
size_parts: int
size_totals: int
num_files_totals: int
def __init__(self, chunks_index: Any) -> None: ...
def feed(self, chunk: bytes) -> None: ...

View File

@ -41,9 +41,7 @@ cdef extern from "cache_sync/cache_sync.c":
CacheSyncCtx *cache_sync_init(HashIndex *chunks)
const char *cache_sync_error(const CacheSyncCtx *ctx)
uint64_t cache_sync_num_files_totals(const CacheSyncCtx *ctx)
uint64_t cache_sync_num_files_parts(const CacheSyncCtx *ctx)
uint64_t cache_sync_size_totals(const CacheSyncCtx *ctx)
uint64_t cache_sync_size_parts(const CacheSyncCtx *ctx)
int cache_sync_feed(CacheSyncCtx *ctx, void *data, uint32_t length)
void cache_sync_free(CacheSyncCtx *ctx)
@ -630,14 +628,6 @@ cdef class CacheSynchronizer:
def num_files_totals(self):
return cache_sync_num_files_totals(self.sync)
@property
def num_files_parts(self):
return cache_sync_num_files_parts(self.sync)
@property
def size_totals(self):
return cache_sync_size_totals(self.sync)
@property
def size_parts(self):
return cache_sync_size_parts(self.sync)

View File

@ -91,10 +91,6 @@ class ArchiveItem(PropDict):
@csize.setter
def csize(self, val: int) -> None: ...
@property
def csize_parts(self) -> int: ...
@csize_parts.setter
def csize_parts(self, val: int) -> None: ...
@property
def items(self) -> List: ...
@items.setter
def items(self, val: List) -> None: ...

View File

@ -297,7 +297,7 @@ cdef class Item(PropDict):
deleted = PropDictProperty(bool)
nlink = PropDictProperty(int)
part = PropDictProperty(int)
part = PropDictProperty(int) # legacy only
def get_size(self, *, memorize=False, from_chunks=False, consider_ids=None):
"""
@ -516,8 +516,8 @@ cdef class ArchiveItem(PropDict):
recreate_partial_chunks = PropDictProperty(list) # list of tuples
size = PropDictProperty(int)
nfiles = PropDictProperty(int)
size_parts = PropDictProperty(int)
nfiles_parts = PropDictProperty(int)
size_parts = PropDictProperty(int) # legacy only
nfiles_parts = PropDictProperty(int) # legacy only
def update_internal(self, d):
# legacy support for migration (data from old msgpacks comes in as bytes always, but sometimes we want str)

View File

@ -182,27 +182,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
)
# repo looking good overall? checks for rc == 0.
self.cmd(f"--repo={self.repository_location}", "check", "--debug")
# verify part files
out = self.cmd(
f"--repo={self.repository_location}",
"extract",
"test",
"stdin.borg_part_1",
"--consider-part-files",
"--stdout",
binary_output=True,
)
assert out == input_data[:chunk_size]
out = self.cmd(
f"--repo={self.repository_location}",
"extract",
"test",
"stdin.borg_part_2",
"--consider-part-files",
"--stdout",
binary_output=True,
)
assert out == input_data[: chunk_size - 1]
# verify that there are no part files in final archive
out = self.cmd(f"--repo={self.repository_location}", "list", "test")
assert "stdin.borg_part" not in out
# verify full file
out = self.cmd(f"--repo={self.repository_location}", "extract", "test", "stdin", "--stdout", binary_output=True)
assert out == input_data

View File

@ -74,7 +74,6 @@ class UpgraderFrom12To20:
"acl_access",
"acl_default",
"acl_extended",
"part",
}
if self.hlm.borg1_hardlink_master(item):