borg create-from-tar

This commit is contained in:
Marian Beermann 2017-02-28 20:38:34 +01:00 committed by Thomas Waldmann
parent 97fa80dd09
commit c14197e559
1 changed files with 197 additions and 0 deletions

View File

@ -1700,6 +1700,126 @@ class Archiver:
cache.commit() cache.commit()
return self.exit_code return self.exit_code
@with_repository(cache=True, exclusive=True)
def do_create_from_tar(self, args, repository, manifest, key, cache):
self.output_filter = args.output_filter
self.output_list = args.output_list
filter = None
if args.tar_filter == 'auto' and args.tarfile != '-':
if args.tarfile.endswith('.tar.gz'):
filter = 'gunzip'
elif args.tarfile.endswith('.tar.bz2'):
filter = 'bunzip2'
elif args.tarfile.endswith('.tar.xz'):
filter = 'unxz'
logger.debug('Automatically determined tar filter: %s', filter)
else:
filter = args.tar_filter
if args.tarfile == '-':
tarstream, tarstream_close = sys.stdin, False
else:
tarstream, tarstream_close = open(args.tarfile, 'rb'), True
if filter:
filterin = tarstream
filterin_close = tarstream_close
filterproc = subprocess.Popen(shlex.split(filter), stdin=filterin.fileno(), stdout=subprocess.PIPE)
# We can't deadlock. Why? Because we just give the FD of the stream to the filter process,
# it can read as much data as it wants!
tarstream = filterproc.stdout
tarstream_close = False
tar = tarfile.open(fileobj=tarstream, mode='r|')
self._create_from_tar(args, repository, manifest, key, cache, tar)
if filter:
logger.debug('Done creating archive, waiting for filter to die...')
rc = filterproc.wait()
logger.debug('filter exited with code %d', rc)
self.exit_code = max(self.exit_code, rc)
if filterin_close:
filterin.close()
if tarstream_close:
tarstream.close()
def _create_from_tar(self, args, repository, manifest, key, cache, tar):
def tarinfo_to_item(tarinfo, type=0):
return Item(path=make_path_safe(tarinfo.name), mode=tarinfo.mode | type,
uid=tarinfo.uid, gid=tarinfo.gid, user=tarinfo.uname, group=tarinfo.gname,
mtime=tarinfo.mtime * 1000**3)
t0 = datetime.utcnow()
t0_monotonic = time.monotonic()
archive = Archive(repository, key, manifest, args.location.archive, cache=cache,
create=True, checkpoint_interval=args.checkpoint_interval,
progress=args.progress,
chunker_params=args.chunker_params, start=t0, start_monotonic=t0_monotonic,
compression=args.compression, compression_files=args.compression_files,
log_json=args.log_json)
while True:
tarinfo = tar.next()
status = '?'
if not tarinfo:
break
if tarinfo.isreg():
status = 'A'
fd = tar.extractfile(tarinfo)
item = tarinfo_to_item(tarinfo, stat.S_IFREG)
compress = archive.compression_decider1.decide(tarinfo.name)
archive.file_compression_logger.debug('%s -> compression %s', tarinfo.name, compress['name'])
archive.chunk_file(item, cache, archive.stats, backup_io_iter(archive.chunker.chunkify(fd)), compress=compress)
item.get_size(memorize=True)
archive.stats.nfiles += 1
elif tarinfo.isdir():
status = 'd'
item = tarinfo_to_item(tarinfo, stat.S_IFDIR)
archive.add_item(item)
elif tarinfo.issym():
status = 's'
item = tarinfo_to_item(tarinfo, stat.S_IFLNK)
item.source = tarinfo.linkname
elif tarinfo.islnk():
# tar uses the same hardlink model as borg (rather vice versa); the first instance of a hardlink
# is stored as a regular file, later instances are special entries referencing back to the
# first instance.
status = 'h'
item = tarinfo_to_item(tarinfo, stat.S_IFREG)
item.source = tarinfo.linkname
else:
self.print_warning('%s: Unsupported tar type %s', tarinfo.name, tarinfo.type)
self.print_file_status('E', tarinfo.name)
continue
self.print_file_status(status, tarinfo.name)
archive.add_item(item)
self._cft_save_archive(args, archive)
def _cft_save_archive(self, args, archive):
archive.save(comment=args.comment, timestamp=args.timestamp)
if args.progress:
archive.stats.show_progress(final=True)
args.stats |= args.json
if args.stats:
if args.json:
json_print(basic_json_data(archive.manifest, cache=archive.cache, extra={
'archive': archive,
}))
else:
log_multi(DASHES,
str(archive),
DASHES,
STATS_HEADER,
str(archive.stats),
str(archive.cache),
DASHES, logger=logging.getLogger('borg.output.stats'))
@with_repository(manifest=False, exclusive=True) @with_repository(manifest=False, exclusive=True)
def do_with_lock(self, args, repository): def do_with_lock(self, args, repository):
"""run a user specified command with the repository lock held""" """run a user specified command with the repository lock held"""
@ -4612,6 +4732,83 @@ class Archiver:
subparser.add_argument('args', metavar='ARGS', nargs=argparse.REMAINDER, subparser.add_argument('args', metavar='ARGS', nargs=argparse.REMAINDER,
help='command arguments') help='command arguments')
create_from_tar_epilog = process_epilog("""
This command creates a backup archive from a tarball.
When giving '-' as path, Borg will read a tar stream from standard input.
By default (--tar-filter=auto) Borg will detect whether the file is compressed
based on it's file extension and pipe the file through an appropriate filter:
- .tar.gz: gunzip
- .tar.bz2: bunzip2
- .tar.xz: unxz
Alternatively a --tar-filter program may be explicitly specified. It should
read compressed data from stdin and output an uncompressed tar stream on
stdout.
Most documentation of borg create applies. Note that this command does not
support excluding files.
create-from-tar reads POSIX.1-1988 (ustar), POSIX.1-2001 (pax), GNU tar,
UNIX V7 tar and SunOS tar with extended attributes.
""")
subparser = subparsers.add_parser('create-from-tar', parents=[common_parser], add_help=False,
description=self.do_create_from_tar.__doc__,
epilog=create_from_tar_epilog,
formatter_class=argparse.RawDescriptionHelpFormatter,
help=self.do_create_from_tar.__doc__)
subparser.set_defaults(func=self.do_create_from_tar)
subparser.add_argument('--tar-filter', dest='tar_filter', default='auto',
help='filter program to pipe data through')
subparser.add_argument('-s', '--stats', dest='stats',
action='store_true', default=False,
help='print statistics for the created archive')
subparser.add_argument('-p', '--progress', dest='progress',
action='store_true', default=False,
help='show progress display while creating the archive, showing Original, '
'Compressed and Deduplicated sizes, followed by the Number of files seen '
'and the path being processed, default: %(default)s')
subparser.add_argument('--list', dest='output_list',
action='store_true', default=False,
help='output verbose list of items (files, dirs, ...)')
subparser.add_argument('--filter', dest='output_filter', metavar='STATUSCHARS',
help='only display items with the given status characters')
subparser.add_argument('--json', action='store_true',
help='output stats as JSON (implies --stats)')
archive_group = subparser.add_argument_group('Archive options')
archive_group.add_argument('--comment', dest='comment', metavar='COMMENT', default='',
help='add a comment text to the archive')
archive_group.add_argument('--timestamp', dest='timestamp',
type=timestamp, default=None,
metavar='TIMESTAMP',
help='manually specify the archive creation date/time (UTC, yyyy-mm-ddThh:mm:ss format). '
'alternatively, give a reference file/directory.')
archive_group.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval',
type=int, default=1800, metavar='SECONDS',
help='write checkpoint every SECONDS seconds (Default: 1800)')
archive_group.add_argument('--chunker-params', dest='chunker_params',
type=ChunkerParams, default=CHUNKER_PARAMS,
metavar='PARAMS',
help='specify the chunker parameters (CHUNK_MIN_EXP, CHUNK_MAX_EXP, '
'HASH_MASK_BITS, HASH_WINDOW_SIZE). default: %d,%d,%d,%d' % CHUNKER_PARAMS)
archive_group.add_argument('-C', '--compression', dest='compression',
type=CompressionSpec, default=dict(name='lz4'), metavar='COMPRESSION',
help='select compression algorithm, see the output of the '
'"borg help compression" command for details.')
archive_group.add_argument('--compression-from', dest='compression_files',
type=argparse.FileType('r'), action='append',
metavar='COMPRESSIONCONFIG',
help='read compression patterns from COMPRESSIONCONFIG, see the output of the '
'"borg help compression" command for details.')
subparser.add_argument('location', metavar='ARCHIVE',
type=location_validator(archive=True),
help='name of archive to create (must be also a valid directory name)')
subparser.add_argument('tarfile', metavar='TARFILE',
help='paths to recreate; patterns are supported')
return parser return parser
def get_args(self, argv, cmd): def get_args(self, argv, cmd):