Merge pull request #3666 from MichaelEischer/compression

Implement compression support
This commit is contained in:
MichaelEischer 2022-04-30 11:49:05 +02:00 committed by GitHub
commit ac9324aeaf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
28 changed files with 899 additions and 284 deletions

View File

@ -0,0 +1,21 @@
Enhancement: Add comppression support
We have added compression support to the restic repository format. To create a
repository using the new format run `init --repository-version 2`. Please note
that the repository cannot be read by restic versions prior to 0.14.0.
You can configure if data is compressed with the option `--compression`. It can
be set to `auto` (the default, which will compress very fast), `max` (which
will trade backup speed and CPU usage for better compression), or `off` (which
disables compression). Each setting is only applied for the single run of restic.
The new format version has not received much testing yet. Do not rely on it as
your only backup copy! Please run `check` in regular intervals to detect any
problems.
Upgrading in place is not yet supported. As a workaround, first create a new
repository using `init --repository-version 2 --copy-chunker-params --repo2 path/to/old/repo`.
Then use the `copy` command to copy all snapshots to the new repository.
https://github.com/restic/restic/issues/21
https://github.com/restic/restic/pull/3666

View File

@ -15,6 +15,7 @@ import (
"sort"
"time"
"github.com/klauspost/compress/zstd"
"github.com/spf13/cobra"
"golang.org/x/sync/errgroup"
@ -309,6 +310,10 @@ func decryptUnsigned(ctx context.Context, k *crypto.Key, buf []byte) []byte {
}
func loadBlobs(ctx context.Context, repo restic.Repository, pack restic.ID, list []restic.Blob) error {
dec, err := zstd.NewReader(nil)
if err != nil {
panic(err)
}
be := repo.Backend()
h := restic.Handle{
Name: pack.String(),
@ -333,44 +338,47 @@ func loadBlobs(ctx context.Context, repo restic.Repository, pack restic.ID, list
nonce, plaintext := buf[:key.NonceSize()], buf[key.NonceSize():]
plaintext, err = key.Open(plaintext[:0], nonce, plaintext, nil)
outputPrefix := ""
filePrefix := ""
if err != nil {
Warnf("error decrypting blob: %v\n", err)
var plain []byte
if tryRepair || repairByte {
plain = tryRepairWithBitflip(ctx, key, buf, repairByte)
plaintext = tryRepairWithBitflip(ctx, key, buf, repairByte)
}
var prefix string
if plain != nil {
id := restic.Hash(plain)
if !id.Equal(blob.ID) {
Printf(" repaired blob (length %v), hash is %v, ID does not match, wanted %v\n", len(plain), id, blob.ID)
prefix = "repaired-wrong-hash-"
} else {
Printf(" successfully repaired blob (length %v), hash is %v, ID matches\n", len(plain), id)
prefix = "repaired-"
}
if plaintext != nil {
outputPrefix = "repaired "
filePrefix = "repaired-"
} else {
plain = decryptUnsigned(ctx, key, buf)
prefix = "damaged-"
plaintext = decryptUnsigned(ctx, key, buf)
err = storePlainBlob(blob.ID, "damaged-", plaintext)
if err != nil {
return err
}
continue
}
err = storePlainBlob(blob.ID, prefix, plain)
}
if blob.IsCompressed() {
decompressed, err := dec.DecodeAll(plaintext, nil)
if err != nil {
return err
Printf(" failed to decompress blob %v\n", blob.ID)
}
if decompressed != nil {
plaintext = decompressed
}
continue
}
id := restic.Hash(plaintext)
var prefix string
if !id.Equal(blob.ID) {
Printf(" successfully decrypted blob (length %v), hash is %v, ID does not match, wanted %v\n", len(plaintext), id, blob.ID)
Printf(" successfully %vdecrypted blob (length %v), hash is %v, ID does not match, wanted %v\n", outputPrefix, len(plaintext), id, blob.ID)
prefix = "wrong-hash-"
} else {
Printf(" successfully decrypted blob (length %v), hash is %v, ID matches\n", len(plaintext), id)
Printf(" successfully %vdecrypted blob (length %v), hash is %v, ID matches\n", outputPrefix, len(plaintext), id)
prefix = "correct-"
}
if extractPack {
err = storePlainBlob(id, prefix, plaintext)
err = storePlainBlob(id, filePrefix+prefix, plaintext)
if err != nil {
return err
}
@ -476,27 +484,15 @@ func examinePack(ctx context.Context, repo restic.Repository, id restic.ID) erro
blobsLoaded := false
// examine all data the indexes have for the pack file
for _, idx := range repo.Index().(*repository.MasterIndex).All() {
idxIDs, err := idx.IDs()
if err != nil {
idxIDs = restic.IDs{}
}
blobs := idx.ListPack(id)
for b := range repo.Index().ListPacks(ctx, restic.NewIDSet(id)) {
blobs := b.Blobs
if len(blobs) == 0 {
continue
}
Printf(" index %v:\n", idxIDs)
checkPackSize(blobs, fi.Size)
// convert list of blobs to []restic.Blob
var list []restic.Blob
for _, b := range blobs {
list = append(list, b.Blob)
}
checkPackSize(list, fi.Size)
err = loadBlobs(ctx, repo, id, list)
err = loadBlobs(ctx, repo, id, blobs)
if err != nil {
Warnf("error: %v\n", err)
} else {
@ -532,14 +528,10 @@ func checkPackSize(blobs []restic.Blob, fileSize int64) {
if offset != uint64(pb.Offset) {
Printf(" hole in file, want offset %v, got %v\n", offset, pb.Offset)
}
offset += uint64(pb.Length)
offset = uint64(pb.Offset + pb.Length)
size += uint64(pb.Length)
}
// compute header size, per blob: 1 byte type, 4 byte length, 32 byte id
size += uint64(restic.CiphertextLength(len(blobs) * (1 + 4 + 32)))
// length in uint32 little endian
size += 4
size += uint64(pack.CalculateHeaderSize(blobs))
if uint64(fileSize) != size {
Printf(" file sizes do not match: computed %v from index, file size is %v\n", size, fileSize)

View File

@ -1,10 +1,13 @@
package main
import (
"strconv"
"github.com/restic/chunker"
"github.com/restic/restic/internal/backend/location"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic"
"github.com/spf13/cobra"
)
@ -30,6 +33,7 @@ Exit status is 0 if the command was successful, and non-zero if there was any er
type InitOptions struct {
secondaryRepoOptions
CopyChunkerParameters bool
RepositoryVersion string
}
var initOptions InitOptions
@ -40,9 +44,26 @@ func init() {
f := cmdInit.Flags()
initSecondaryRepoOptions(f, &initOptions.secondaryRepoOptions, "secondary", "to copy chunker parameters from")
f.BoolVar(&initOptions.CopyChunkerParameters, "copy-chunker-params", false, "copy chunker parameters from the secondary repository (useful with the copy command)")
f.StringVar(&initOptions.RepositoryVersion, "repository-version", "stable", "repository format version to use, allowed values are a format version, 'latest' and 'stable'")
}
func runInit(opts InitOptions, gopts GlobalOptions, args []string) error {
var version uint
if opts.RepositoryVersion == "latest" || opts.RepositoryVersion == "" {
version = restic.MaxRepoVersion
} else if opts.RepositoryVersion == "stable" {
version = restic.StableRepoVersion
} else {
v, err := strconv.ParseUint(opts.RepositoryVersion, 10, 32)
if err != nil {
return errors.Fatal("invalid repository version")
}
version = uint(v)
}
if version < restic.MinRepoVersion || version > restic.MaxRepoVersion {
return errors.Fatalf("only repository versions between %v and %v are allowed", restic.MinRepoVersion, restic.MaxRepoVersion)
}
chunkerPolynomial, err := maybeReadChunkerPolynomial(opts, gopts)
if err != nil {
return err
@ -65,9 +86,9 @@ func runInit(opts InitOptions, gopts GlobalOptions, args []string) error {
return errors.Fatalf("create repository at %s failed: %v\n", location.StripPassword(gopts.Repo), err)
}
s := repository.New(be)
s := repository.New(be, repository.Options{Compression: gopts.Compression})
err = s.Init(gopts.ctx, gopts.password, chunkerPolynomial)
err = s.Init(gopts.ctx, version, gopts.password, chunkerPolynomial)
if err != nil {
return errors.Fatalf("create key in repository at %s failed: %v\n", location.StripPassword(gopts.Repo), err)
}

View File

@ -64,6 +64,7 @@ type GlobalOptions struct {
InsecureTLS bool
TLSClientCert string
CleanupCache bool
Compression repository.CompressionMode
LimitUploadKb int
LimitDownloadKb int
@ -120,6 +121,7 @@ func init() {
f.StringVar(&globalOptions.TLSClientCert, "tls-client-cert", "", "path to a `file` containing PEM encoded TLS client certificate and private key")
f.BoolVar(&globalOptions.InsecureTLS, "insecure-tls", false, "skip TLS certificate verification when connecting to the repo (insecure)")
f.BoolVar(&globalOptions.CleanupCache, "cleanup-cache", false, "auto remove old cache directories")
f.Var(&globalOptions.Compression, "compression", "compression mode (only available for repo format version 2), one of (auto|off|max)")
f.IntVar(&globalOptions.LimitUploadKb, "limit-upload", 0, "limits uploads to a maximum rate in KiB/s. (default: unlimited)")
f.IntVar(&globalOptions.LimitDownloadKb, "limit-download", 0, "limits downloads to a maximum rate in KiB/s. (default: unlimited)")
f.StringSliceVarP(&globalOptions.Options, "option", "o", []string{}, "set extended option (`key=value`, can be specified multiple times)")
@ -435,7 +437,7 @@ func OpenRepository(opts GlobalOptions) (*repository.Repository, error) {
}
}
s := repository.New(be)
s := repository.New(be, repository.Options{Compression: opts.Compression})
passwordTriesLeft := 1
if stdinIsTerminal() && opts.password == "" {
@ -471,7 +473,7 @@ func OpenRepository(opts GlobalOptions) (*repository.Repository, error) {
id = id[:8]
}
if !opts.JSON {
Verbosef("repository %v opened successfully, password is correct\n", id)
Verbosef("repository %v opened (repo version %v) successfully, password is correct\n", id, s.Config().Version)
}
}

View File

@ -1470,7 +1470,7 @@ func TestRebuildIndexAlwaysFull(t *testing.T) {
defer func() {
repository.IndexFull = indexFull
}()
repository.IndexFull = func(*repository.Index) bool { return true }
repository.IndexFull = func(*repository.Index, bool) bool { return true }
testRebuildIndex(t, nil)
}

View File

@ -35,6 +35,13 @@ options exist:
* Configuring a program to be called when the password is needed via the
option ``--password-command`` or the environment variable
``RESTIC_PASSWORD_COMMAND``
* The ``init`` command has an option called ``--repository-version`` which can
be used to explicitely set the version for the new repository. By default,
the current stable version is used. Have a look at the `design documentation
<https://github.com/restic/restic/blob/master/doc/design.rst>`__ for
details.
Local
*****
@ -692,4 +699,3 @@ On MSYS2, you can install ``winpty`` as follows:
$ pacman -S winpty
$ winpty restic -r /srv/restic-repo init

View File

@ -62,28 +62,30 @@ like the following:
.. code:: json
{
"version": 1,
"version": 2,
"id": "5956a3f67a6230d4a92cefb29529f10196c7d92582ec305fd71ff6d331d6271b",
"chunker_polynomial": "25b468838dcb75"
}
After decryption, restic first checks that the version field contains a
version number that it understands, otherwise it aborts. At the moment,
the version is expected to be 1. The field ``id`` holds a unique ID
which consists of 32 random bytes, encoded in hexadecimal. This uniquely
identifies the repository, regardless if it is accessed via SFTP or
locally. The field ``chunker_polynomial`` contains a parameter that is
used for splitting large files into smaller chunks (see below).
version number that it understands, otherwise it aborts. At the moment, the
version is expected to be 1 or 2. The list of changes in the repository
format is contained in the section "Changes" below.
The field ``id`` holds a unique ID which consists of 32 random bytes, encoded
in hexadecimal. This uniquely identifies the repository, regardless if it is
accessed via a remote storage backend or locally. The field
``chunker_polynomial`` contains a parameter that is used for splitting large
files into smaller chunks (see below).
Repository Layout
-----------------
The ``local`` and ``sftp`` backends are implemented using files and
directories stored in a file system. The directory layout is the same
for both backend types.
for both backend types and is also used for all other remote backends.
The basic layout of a repository stored in a ``local`` or ``sftp``
backend is shown here:
The basic layout of a repository is shown here:
::
@ -109,8 +111,7 @@ backend is shown here:
│ └── 22a5af1bdc6e616f8a29579458c49627e01b32210d09adb288d1ecda7c5711ec
└── tmp
A local repository can be initialized with the ``restic init`` command,
e.g.:
A local repository can be initialized with the ``restic init`` command, e.g.:
.. code-block:: console
@ -186,40 +187,75 @@ After decryption, a Pack's header consists of the following elements:
::
Type_Blob1 || Length(EncryptedBlob1) || Hash(Plaintext_Blob1) ||
Type_Blob1 || Data_Blob1 ||
[...]
Type_BlobN || Length(EncryptedBlobN) || Hash(Plaintext_Blobn) ||
Type_BlobN || Data_BlobN ||
The Blob type field is a single byte. What follows it depends on the type. The
following Blob types are defined:
+-----------+----------------------+-------------------------------------------------------------------------------+
| Type | Meaning | Data |
+===========+======================+===============================================================================+
| 0b00 | data blob | ``Length(encrypted_blob) || Hash(plaintext_blob)`` |
+-----------+----------------------+-------------------------------------------------------------------------------+
| 0b01 | tree blob | ``Length(encrypted_blob) || Hash(plaintext_blob)`` |
+-----------+----------------------+-------------------------------------------------------------------------------+
| 0b10 | compressed data blob | ``Length(encrypted_blob) || Length(plaintext_blob) || Hash(plaintext_blob)`` |
+-----------+----------------------+-------------------------------------------------------------------------------+
| 0b11 | compressed tree blob | ``Length(encrypted_blob) || Length(plaintext_blob) || Hash(plaintext_blob)`` |
+-----------+----------------------+-------------------------------------------------------------------------------+
This is enough to calculate the offsets for all the Blobs in the Pack.
Length is the length of a Blob as a four byte integer in little-endian
format. The type field is a one byte field and labels the content of a
blob according to the following table:
The length fields are encoded as four byte integers in little-endian
format. In the Data column, ``Length(plaintext_blob)`` means the length
of the decrypted and uncompressed data a blob consists of.
+--------+-----------+
| Type | Meaning |
+========+===========+
| 0 | data |
+--------+-----------+
| 1 | tree |
+--------+-----------+
All other types are invalid, more types may be added in the future. The
compressed types are only valid for repository format version 2. Data and
tree blobs may be compressed with the zstandard compression algorithm.
All other types are invalid, more types may be added in the future.
In repository format version 1, data and tree blobs should be stored in
separate pack files. In version 2, they must be stored in separate files.
Compressed and non-compress blobs of the same type may be mixed in a pack
file.
For reconstructing the index or parsing a pack without an index, first
the last four bytes must be read in order to find the length of the
header. Afterwards, the header can be read and parsed, which yields all
plaintext hashes, types, offsets and lengths of all included blobs.
Unpacked Data Format
====================
Individual files for the index, locks or snapshots are encrypted
and authenticated like Data and Tree Blobs, so the outer structure is
``IV || Ciphertext || MAC`` again. In repository format version 1 the
plaintext always consists of a JSON document which must either be an
object or an array.
Repository format version 2 adds support for compression. The plaintext
now starts with a header to indicate the encoding version to distinguish
it from plain JSON and to allow for further evolution of the storage format:
``encoding_version || data``
The ``encoding_version`` field is encoded as one byte.
For backwards compatibility the encoding versions '[' (0x5b) and '{' (0x7b)
are used to mark that the whole plaintext (including the encoding version
byte) should treated as JSON document.
For new data the encoding version is currently always ``2``. For that
version ``data`` contains a JSON document compressed using the zstandard
compression algorithm.
Indexing
========
Index files contain information about Data and Tree Blobs and the Packs
they are contained in and store this information in the repository. When
the local cached index is not accessible any more, the index files can
be downloaded and used to reconstruct the index. The files are encrypted
and authenticated like Data and Tree Blobs, so the outer structure is
``IV || Ciphertext || MAC`` again. The plaintext consists of a JSON
document like the following:
be downloaded and used to reconstruct the index. The file encoding is
described in the "Unpacked Data Format" section. The plaintext consists
of a JSON document like the following:
.. code:: json
@ -235,18 +271,22 @@ document like the following:
"id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce",
"type": "data",
"offset": 0,
"length": 25
},{
"length": 38,
// no 'uncompressed_length' as blob is not compressed
},
{
"id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae",
"type": "tree",
"offset": 38,
"length": 100
"length": 112,
"uncompressed_length": 511,
},
{
"id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66",
"type": "data",
"offset": 150,
"length": 123
"length": 123,
"uncompressed_length": 234,
}
]
}, [...]
@ -255,7 +295,11 @@ document like the following:
This JSON document lists Packs and the blobs contained therein. In this
example, the Pack ``73d04e61`` contains two data Blobs and one Tree
blob, the plaintext hashes are listed afterwards.
blob, the plaintext hashes are listed afterwards. The ``length`` field
corresponds to ``Length(encrypted_blob)`` in the pack file header.
Field ``uncompressed_length`` is only present for compressed blobs and
therefore is never present in version 1. It is set to the value of
``Length(blob)``.
The field ``supersedes`` lists the storage IDs of index files that have
been replaced with the current index file. This happens when index files
@ -350,8 +394,9 @@ Snapshots
A snapshot represents a directory with all files and sub-directories at
a given point in time. For each backup that is made, a new snapshot is
created. A snapshot is a JSON document that is stored in an encrypted
file below the directory ``snapshots`` in the repository. The filename
created. A snapshot is a JSON document that is stored in a file below
the directory ``snapshots`` in the repository. It uses the file encoding
described in the "Unpacked Data Format" section. The filename
is the storage ID. This string is unique and used within restic to
uniquely identify a snapshot.
@ -412,7 +457,7 @@ Blobs of data. The SHA-256 hashes of all Blobs are saved in an ordered
list which then represents the content of the file.
In order to relate these plaintext hashes to the actual location within
a Pack file , an index is used. If the index is not available, the
a Pack file, an index is used. If the index is not available, the
header of all data Blobs can be read.
Trees and Data
@ -517,8 +562,8 @@ time there must not be any other locks (exclusive and non-exclusive).
There may be multiple non-exclusive locks in parallel.
A lock is a file in the subdir ``locks`` whose filename is the storage
ID of the contents. It is encrypted and authenticated the same way as
other files in the repository and contains the following JSON structure:
ID of the contents. It is stored in the file encoding described in the
"Unpacked Data Format" section and contains the following JSON structure:
.. code:: json
@ -721,3 +766,11 @@ An adversary who has a leaked (decrypted) key for a repository could:
only be done using the ``copy`` command, which moves the data into a new
repository with a new master key, or by making a completely new repository
and new backup.
Changes
=======
Repository Version 2
--------------------
* Support compression for blobs (data/tree) and index / lock / snapshot files

2
go.mod
View File

@ -21,7 +21,7 @@ require (
github.com/hashicorp/golang-lru v0.5.4
github.com/json-iterator/go v1.1.12 // indirect
github.com/juju/ratelimit v1.0.1
github.com/klauspost/compress v1.15.1 // indirect
github.com/klauspost/compress v1.15.1
github.com/klauspost/cpuid/v2 v2.0.12 // indirect
github.com/kurin/blazer v0.5.4-0.20211030221322-ba894c124ac6
github.com/minio/md5-simd v1.1.2 // indirect

View File

@ -1894,7 +1894,7 @@ func TestArchiverContextCanceled(t *testing.T) {
defer removeTempdir()
// Ensure that the archiver itself reports the canceled context and not just the backend
repo, _ := repository.TestRepositoryWithBackend(t, &noCancelBackend{mem.New()})
repo, _ := repository.TestRepositoryWithBackend(t, &noCancelBackend{mem.New()}, 0)
back := restictest.Chdir(t, tempdir)
defer back()

View File

@ -350,7 +350,7 @@ func TestCheckerModifiedData(t *testing.T) {
t.Logf("archived as %v", sn.ID().Str())
beError := &errorBackend{Backend: repo.Backend()}
checkRepo := repository.New(beError)
checkRepo := repository.New(beError, repository.Options{})
test.OK(t, checkRepo.SearchKey(context.TODO(), test.TestPassword, 5, ""))
chkr := checker.New(checkRepo, false)

View File

@ -32,7 +32,7 @@ func NewPacker(k *crypto.Key, wr io.Writer) *Packer {
// Add saves the data read from rd as a new blob to the packer. Returned is the
// number of bytes written to the pack.
func (p *Packer) Add(t restic.BlobType, id restic.ID, data []byte) (int, error) {
func (p *Packer) Add(t restic.BlobType, id restic.ID, data []byte, uncompressedLength int) (int, error) {
p.m.Lock()
defer p.m.Unlock()
@ -41,13 +41,15 @@ func (p *Packer) Add(t restic.BlobType, id restic.ID, data []byte) (int, error)
n, err := p.wr.Write(data)
c.Length = uint(n)
c.Offset = p.bytes
c.UncompressedLength = uint(uncompressedLength)
p.bytes += uint(n)
p.blobs = append(p.blobs, c)
return n, errors.Wrap(err, "Write")
}
var entrySize = uint(binary.Size(restic.BlobType(0)) + headerLengthSize + len(restic.ID{}))
var entrySize = uint(binary.Size(restic.BlobType(0)) + 2*headerLengthSize + len(restic.ID{}))
var plainEntrySize = uint(binary.Size(restic.BlobType(0)) + headerLengthSize + len(restic.ID{}))
// headerEntry describes the format of header entries. It serves only as
// documentation.
@ -57,6 +59,15 @@ type headerEntry struct {
ID restic.ID
}
// compressedHeaderEntry describes the format of header entries for compressed blobs.
// It serves only as documentation.
type compressedHeaderEntry struct {
Type uint8
Length uint32
UncompressedLength uint32
ID restic.ID
}
// Finalize writes the header for all added blobs and finalizes the pack.
// Returned are the number of bytes written, including the header.
func (p *Packer) Finalize() (uint, error) {
@ -70,7 +81,7 @@ func (p *Packer) Finalize() (uint, error) {
return 0, err
}
encryptedHeader := make([]byte, 0, len(header)+p.k.Overhead()+p.k.NonceSize())
encryptedHeader := make([]byte, 0, restic.CiphertextLength(len(header)))
nonce := crypto.NewRandomNonce()
encryptedHeader = append(encryptedHeader, nonce...)
encryptedHeader = p.k.Seal(encryptedHeader, nonce, header, nil)
@ -81,7 +92,7 @@ func (p *Packer) Finalize() (uint, error) {
return 0, errors.Wrap(err, "Write")
}
hdrBytes := restic.CiphertextLength(len(header))
hdrBytes := len(encryptedHeader)
if n != hdrBytes {
return 0, errors.New("wrong number of bytes written")
}
@ -104,11 +115,15 @@ func (p *Packer) makeHeader() ([]byte, error) {
buf := make([]byte, 0, len(p.blobs)*int(entrySize))
for _, b := range p.blobs {
switch b.Type {
case restic.DataBlob:
switch {
case b.Type == restic.DataBlob && b.UncompressedLength == 0:
buf = append(buf, 0)
case restic.TreeBlob:
case b.Type == restic.TreeBlob && b.UncompressedLength == 0:
buf = append(buf, 1)
case b.Type == restic.DataBlob && b.UncompressedLength != 0:
buf = append(buf, 2)
case b.Type == restic.TreeBlob && b.UncompressedLength != 0:
buf = append(buf, 3)
default:
return nil, errors.Errorf("invalid blob type %v", b.Type)
}
@ -116,6 +131,10 @@ func (p *Packer) makeHeader() ([]byte, error) {
var lenLE [4]byte
binary.LittleEndian.PutUint32(lenLE[:], uint32(b.Length))
buf = append(buf, lenLE[:]...)
if b.UncompressedLength != 0 {
binary.LittleEndian.PutUint32(lenLE[:], uint32(b.UncompressedLength))
buf = append(buf, lenLE[:]...)
}
buf = append(buf, b.ID[:]...)
}
@ -152,7 +171,7 @@ func (p *Packer) String() string {
var (
// we require at least one entry in the header, and one blob for a pack file
minFileSize = entrySize + crypto.Extension + uint(headerLengthSize)
minFileSize = plainEntrySize + crypto.Extension + uint(headerLengthSize)
)
const (
@ -167,16 +186,11 @@ const (
eagerEntries = 15
)
// readRecords reads up to max records from the underlying ReaderAt, returning
// the raw header, the total number of records in the header, and any error.
// If the header contains fewer than max entries, the header is truncated to
// readRecords reads up to bufsize bytes from the underlying ReaderAt, returning
// the raw header, the total number of bytes in the header, and any error.
// If the header contains fewer than bufsize bytes, the header is truncated to
// the appropriate size.
func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) {
var bufsize int
bufsize += max * int(entrySize)
bufsize += crypto.Extension
bufsize += headerLengthSize
func readRecords(rd io.ReaderAt, size int64, bufsize int) ([]byte, int, error) {
if bufsize > int(size) {
bufsize = int(size)
}
@ -197,8 +211,6 @@ func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) {
err = InvalidFileError{Message: "header length is zero"}
case hlen < crypto.Extension:
err = InvalidFileError{Message: "header length is too small"}
case (hlen-crypto.Extension)%uint32(entrySize) != 0:
err = InvalidFileError{Message: "header length is invalid"}
case int64(hlen) > size-int64(headerLengthSize):
err = InvalidFileError{Message: "header is larger than file"}
case int64(hlen) > MaxHeaderSize-int64(headerLengthSize):
@ -208,8 +220,8 @@ func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) {
return nil, 0, errors.Wrap(err, "readHeader")
}
total := (int(hlen) - crypto.Extension) / int(entrySize)
if total < max {
total := int(hlen + headerLengthSize)
if total < bufsize {
// truncate to the beginning of the pack header
b = b[len(b)-int(hlen):]
}
@ -230,11 +242,12 @@ func readHeader(rd io.ReaderAt, size int64) ([]byte, error) {
// eagerly download eagerEntries header entries as part of header-length request.
// only make second request if actual number of entries is greater than eagerEntries
b, c, err := readRecords(rd, size, eagerEntries)
eagerSize := eagerEntries*int(entrySize) + headerSize
b, c, err := readRecords(rd, size, eagerSize)
if err != nil {
return nil, err
}
if c <= eagerEntries {
if c <= eagerSize {
// eager read sufficed, return what we got
return b, nil
}
@ -262,7 +275,7 @@ func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, hdr
return nil, 0, err
}
if len(buf) < k.NonceSize()+k.Overhead() {
if len(buf) < restic.CiphertextLength(0) {
return nil, 0, errors.New("invalid header, too small")
}
@ -274,11 +287,12 @@ func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, hdr
return nil, 0, err
}
entries = make([]restic.Blob, 0, uint(len(buf))/entrySize)
// might over allocate a bit if all blobs have EntrySize but only by a few percent
entries = make([]restic.Blob, 0, uint(len(buf))/plainEntrySize)
pos := uint(0)
for len(buf) > 0 {
entry, err := parseHeaderEntry(buf)
entry, headerSize, err := parseHeaderEntry(buf)
if err != nil {
return nil, 0, err
}
@ -286,36 +300,60 @@ func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, hdr
entries = append(entries, entry)
pos += entry.Length
buf = buf[entrySize:]
buf = buf[headerSize:]
}
return entries, hdrSize, nil
}
func parseHeaderEntry(p []byte) (b restic.Blob, err error) {
if uint(len(p)) < entrySize {
func parseHeaderEntry(p []byte) (b restic.Blob, size uint, err error) {
l := uint(len(p))
size = plainEntrySize
if l < plainEntrySize {
err = errors.Errorf("parseHeaderEntry: buffer of size %d too short", len(p))
return b, err
return b, size, err
}
p = p[:entrySize]
tpe := p[0]
switch p[0] {
case 0:
switch tpe {
case 0, 2:
b.Type = restic.DataBlob
case 1:
case 1, 3:
b.Type = restic.TreeBlob
default:
return b, errors.Errorf("invalid type %d", p[0])
return b, size, errors.Errorf("invalid type %d", tpe)
}
b.Length = uint(binary.LittleEndian.Uint32(p[1:5]))
copy(b.ID[:], p[5:])
p = p[5:]
if tpe == 2 || tpe == 3 {
size = entrySize
if l < entrySize {
err = errors.Errorf("parseHeaderEntry: buffer of size %d too short", len(p))
return b, size, err
}
b.UncompressedLength = uint(binary.LittleEndian.Uint32(p[0:4]))
p = p[4:]
}
return b, nil
copy(b.ID[:], p[:])
return b, size, nil
}
func CalculateEntrySize(blob restic.Blob) int {
if blob.UncompressedLength != 0 {
return int(entrySize)
}
return int(plainEntrySize)
}
func CalculateHeaderSize(blobs []restic.Blob) int {
return headerSize + len(blobs)*int(entrySize)
size := headerSize
for _, blob := range blobs {
size += CalculateEntrySize(blob)
}
return size
}
// Size returns the size of all packs computed by index information.
@ -333,7 +371,7 @@ func Size(ctx context.Context, mi restic.MasterIndex, onlyHdr bool) map[restic.I
if !onlyHdr {
size += int64(blob.Length)
}
packSize[blob.PackID] = size + int64(entrySize)
packSize[blob.PackID] = size + int64(CalculateEntrySize(blob.Blob))
}
return packSize

View File

@ -13,7 +13,7 @@ import (
func TestParseHeaderEntry(t *testing.T) {
h := headerEntry{
Type: 0, // Blob.
Type: 0, // Blob
Length: 100,
}
for i := range h.ID {
@ -23,25 +23,58 @@ func TestParseHeaderEntry(t *testing.T) {
buf := new(bytes.Buffer)
_ = binary.Write(buf, binary.LittleEndian, &h)
b, err := parseHeaderEntry(buf.Bytes())
b, size, err := parseHeaderEntry(buf.Bytes())
rtest.OK(t, err)
rtest.Equals(t, restic.DataBlob, b.Type)
rtest.Equals(t, plainEntrySize, size)
t.Logf("%v %v", h.ID, b.ID)
rtest.Assert(t, bytes.Equal(h.ID[:], b.ID[:]), "id mismatch")
rtest.Equals(t, h.ID[:], b.ID[:])
rtest.Equals(t, uint(h.Length), b.Length)
rtest.Equals(t, uint(0), b.UncompressedLength)
c := compressedHeaderEntry{
Type: 2, // compressed Blob
Length: 100,
UncompressedLength: 200,
}
for i := range c.ID {
c.ID[i] = byte(i)
}
buf = new(bytes.Buffer)
_ = binary.Write(buf, binary.LittleEndian, &c)
b, size, err = parseHeaderEntry(buf.Bytes())
rtest.OK(t, err)
rtest.Equals(t, restic.DataBlob, b.Type)
rtest.Equals(t, entrySize, size)
t.Logf("%v %v", c.ID, b.ID)
rtest.Equals(t, c.ID[:], b.ID[:])
rtest.Equals(t, uint(c.Length), b.Length)
rtest.Equals(t, uint(c.UncompressedLength), b.UncompressedLength)
}
func TestParseHeaderEntryErrors(t *testing.T) {
h := headerEntry{
Type: 0, // Blob
Length: 100,
}
for i := range h.ID {
h.ID[i] = byte(i)
}
h.Type = 0xae
buf.Reset()
buf := new(bytes.Buffer)
_ = binary.Write(buf, binary.LittleEndian, &h)
b, err = parseHeaderEntry(buf.Bytes())
_, _, err := parseHeaderEntry(buf.Bytes())
rtest.Assert(t, err != nil, "no error for invalid type")
h.Type = 0
buf.Reset()
_ = binary.Write(buf, binary.LittleEndian, &h)
b, err = parseHeaderEntry(buf.Bytes()[:entrySize-1])
_, _, err = parseHeaderEntry(buf.Bytes()[:plainEntrySize-1])
rtest.Assert(t, err != nil, "no error for short input")
}
@ -97,7 +130,8 @@ func TestReadHeaderEagerLoad(t *testing.T) {
func TestReadRecords(t *testing.T) {
testReadRecords := func(dataSize, entryCount, totalRecords int) {
totalHeader := rtest.Random(0, totalRecords*int(entrySize)+crypto.Extension)
off := len(totalHeader) - (entryCount*int(entrySize) + crypto.Extension)
bufSize := entryCount*int(entrySize) + crypto.Extension
off := len(totalHeader) - bufSize
if off < 0 {
off = 0
}
@ -110,10 +144,10 @@ func TestReadRecords(t *testing.T) {
rd := bytes.NewReader(buf.Bytes())
header, count, err := readRecords(rd, int64(rd.Len()), entryCount)
header, count, err := readRecords(rd, int64(rd.Len()), bufSize+4)
rtest.OK(t, err)
rtest.Equals(t, len(totalHeader)+4, count)
rtest.Equals(t, expectedHeader, header)
rtest.Equals(t, totalRecords, count)
}
// basic

View File

@ -38,7 +38,7 @@ func newPack(t testing.TB, k *crypto.Key, lengths []int) ([]Buf, []byte, uint) {
var buf bytes.Buffer
p := pack.NewPacker(k, &buf)
for _, b := range bufs {
_, err := p.Add(restic.TreeBlob, b.id, b.data)
_, err := p.Add(restic.TreeBlob, b.id, b.data, 2*len(b.data))
rtest.OK(t, err)
}

View File

@ -75,12 +75,12 @@ const maxuint32 = 1<<32 - 1
func (idx *Index) store(packIndex int, blob restic.Blob) {
// assert that offset and length fit into uint32!
if blob.Offset > maxuint32 || blob.Length > maxuint32 {
if blob.Offset > maxuint32 || blob.Length > maxuint32 || blob.UncompressedLength > maxuint32 {
panic("offset or length does not fit in uint32. You have packs > 4GB!")
}
m := &idx.byType[blob.Type]
m.add(blob.ID, packIndex, uint32(blob.Offset), uint32(blob.Length))
m.add(blob.ID, packIndex, uint32(blob.Offset), uint32(blob.Length), uint32(blob.UncompressedLength))
}
// Final returns true iff the index is already written to the repository, it is
@ -93,12 +93,13 @@ func (idx *Index) Final() bool {
}
const (
indexMaxBlobs = 50000
indexMaxAge = 10 * time.Minute
indexMaxBlobs = 50000
indexMaxBlobsCompressed = 3 * indexMaxBlobs
indexMaxAge = 10 * time.Minute
)
// IndexFull returns true iff the index is "full enough" to be saved as a preliminary index.
var IndexFull = func(idx *Index) bool {
var IndexFull = func(idx *Index, compress bool) bool {
idx.m.Lock()
defer idx.m.Unlock()
@ -109,12 +110,18 @@ var IndexFull = func(idx *Index) bool {
blobs += idx.byType[typ].len()
}
age := time.Since(idx.created)
var maxBlobs uint
if compress {
maxBlobs = indexMaxBlobsCompressed
} else {
maxBlobs = indexMaxBlobs
}
switch {
case age >= indexMaxAge:
debug.Log("index %p is old enough", idx, age)
return true
case blobs >= indexMaxBlobs:
case blobs >= maxBlobs:
debug.Log("index %p has %d blobs", idx, blobs)
return true
}
@ -169,8 +176,9 @@ func (idx *Index) toPackedBlob(e *indexEntry, t restic.BlobType) restic.PackedBl
BlobHandle: restic.BlobHandle{
ID: e.id,
Type: t},
Length: uint(e.length),
Offset: uint(e.offset),
Length: uint(e.length),
Offset: uint(e.offset),
UncompressedLength: uint(e.uncompressedLength),
},
PackID: idx.packs[e.packIndex],
}
@ -225,6 +233,9 @@ func (idx *Index) LookupSize(bh restic.BlobHandle) (plaintextLength uint, found
if e == nil {
return 0, false
}
if e.uncompressedLength != 0 {
return uint(e.uncompressedLength), true
}
return uint(restic.PlaintextLength(int(e.length))), true
}
@ -357,10 +368,11 @@ type packJSON struct {
}
type blobJSON struct {
ID restic.ID `json:"id"`
Type restic.BlobType `json:"type"`
Offset uint `json:"offset"`
Length uint `json:"length"`
ID restic.ID `json:"id"`
Type restic.BlobType `json:"type"`
Offset uint `json:"offset"`
Length uint `json:"length"`
UncompressedLength uint `json:"uncompressed_length,omitempty"`
}
// generatePackList returns a list of packs.
@ -391,10 +403,11 @@ func (idx *Index) generatePackList() ([]*packJSON, error) {
// add blob
p.Blobs = append(p.Blobs, blobJSON{
ID: e.id,
Type: restic.BlobType(typ),
Offset: uint(e.offset),
Length: uint(e.length),
ID: e.id,
Type: restic.BlobType(typ),
Offset: uint(e.offset),
Length: uint(e.length),
UncompressedLength: uint(e.uncompressedLength),
})
return true
@ -553,7 +566,7 @@ func (idx *Index) merge(idx2 *Index) error {
m2.foreach(func(e2 *indexEntry) bool {
if !hasIdenticalEntry(e2) {
// packIndex needs to be changed as idx2.pack was appended to idx.pack, see above
m.add(e2.id, e2.packIndex+packlen, e2.offset, e2.length)
m.add(e2.id, e2.packIndex+packlen, e2.offset, e2.length, e2.uncompressedLength)
}
return true
})
@ -601,8 +614,9 @@ func DecodeIndex(buf []byte, id restic.ID) (idx *Index, oldFormat bool, err erro
BlobHandle: restic.BlobHandle{
Type: blob.Type,
ID: blob.ID},
Offset: blob.Offset,
Length: blob.Length,
Offset: blob.Offset,
Length: blob.Length,
UncompressedLength: blob.UncompressedLength,
})
switch blob.Type {
@ -648,6 +662,7 @@ func decodeOldIndex(buf []byte) (idx *Index, err error) {
ID: blob.ID},
Offset: blob.Offset,
Length: blob.Length,
// no compressed length in the old index format
})
switch blob.Type {

View File

@ -23,11 +23,17 @@ func TestIndexSerialize(t *testing.T) {
pos := uint(0)
for j := 0; j < 20; j++ {
length := uint(i*100 + j)
uncompressedLength := uint(0)
if i >= 25 {
// test a mix of compressed and uncompressed packs
uncompressedLength = 2 * length
}
pb := restic.PackedBlob{
Blob: restic.Blob{
BlobHandle: restic.NewRandomBlobHandle(),
Offset: pos,
Length: length,
BlobHandle: restic.NewRandomBlobHandle(),
Offset: pos,
Length: length,
UncompressedLength: uncompressedLength,
},
PackID: packID,
}
@ -164,7 +170,7 @@ func TestIndexSize(t *testing.T) {
}
// example index serialization from doc/Design.rst
var docExample = []byte(`
var docExampleV1 = []byte(`
{
"supersedes": [
"ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452"
@ -177,12 +183,12 @@ var docExample = []byte(`
"id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce",
"type": "data",
"offset": 0,
"length": 25
"length": 38
},{
"id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae",
"type": "tree",
"offset": 38,
"length": 100
"length": 112
},
{
"id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66",
@ -196,6 +202,41 @@ var docExample = []byte(`
}
`)
var docExampleV2 = []byte(`
{
"supersedes": [
"ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452"
],
"packs": [
{
"id": "73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c",
"blobs": [
{
"id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce",
"type": "data",
"offset": 0,
"length": 38
},
{
"id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae",
"type": "tree",
"offset": 38,
"length": 112,
"uncompressed_length": 511
},
{
"id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66",
"type": "data",
"offset": 150,
"length": 123,
"uncompressed_length": 234
}
]
}
]
}
`)
var docOldExample = []byte(`
[ {
"id": "73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c",
@ -204,12 +245,12 @@ var docOldExample = []byte(`
"id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce",
"type": "data",
"offset": 0,
"length": 25
"length": 38
},{
"id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae",
"type": "tree",
"offset": 38,
"length": 100
"length": 112
},
{
"id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66",
@ -222,22 +263,23 @@ var docOldExample = []byte(`
`)
var exampleTests = []struct {
id, packID restic.ID
tpe restic.BlobType
offset, length uint
id, packID restic.ID
tpe restic.BlobType
offset, length uint
uncompressedLength uint
}{
{
restic.TestParseID("3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce"),
restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"),
restic.DataBlob, 0, 25,
restic.DataBlob, 0, 38, 0,
}, {
restic.TestParseID("9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae"),
restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"),
restic.TreeBlob, 38, 100,
restic.TreeBlob, 38, 112, 511,
}, {
restic.TestParseID("d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66"),
restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"),
restic.DataBlob, 150, 123,
restic.DataBlob, 150, 123, 234,
},
}
@ -254,41 +296,56 @@ var exampleLookupTest = struct {
}
func TestIndexUnserialize(t *testing.T) {
oldIdx := restic.IDs{restic.TestParseID("ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452")}
for _, task := range []struct {
idxBytes []byte
version int
}{
{docExampleV1, 1},
{docExampleV2, 2},
} {
oldIdx := restic.IDs{restic.TestParseID("ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452")}
idx, oldFormat, err := repository.DecodeIndex(docExample, restic.NewRandomID())
rtest.OK(t, err)
rtest.Assert(t, !oldFormat, "new index format recognized as old format")
idx, oldFormat, err := repository.DecodeIndex(task.idxBytes, restic.NewRandomID())
rtest.OK(t, err)
rtest.Assert(t, !oldFormat, "new index format recognized as old format")
for _, test := range exampleTests {
list := idx.Lookup(restic.BlobHandle{ID: test.id, Type: test.tpe}, nil)
if len(list) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list)
for _, test := range exampleTests {
list := idx.Lookup(restic.BlobHandle{ID: test.id, Type: test.tpe}, nil)
if len(list) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list)
}
blob := list[0]
t.Logf("looking for blob %v/%v, got %v", test.tpe, test.id.Str(), blob)
rtest.Equals(t, test.packID, blob.PackID)
rtest.Equals(t, test.tpe, blob.Type)
rtest.Equals(t, test.offset, blob.Offset)
rtest.Equals(t, test.length, blob.Length)
if task.version == 1 {
rtest.Equals(t, uint(0), blob.UncompressedLength)
} else if task.version == 2 {
rtest.Equals(t, test.uncompressedLength, blob.UncompressedLength)
} else {
t.Fatal("Invalid index version")
}
}
blob := list[0]
t.Logf("looking for blob %v/%v, got %v", test.tpe, test.id.Str(), blob)
rtest.Equals(t, oldIdx, idx.Supersedes())
rtest.Equals(t, test.packID, blob.PackID)
rtest.Equals(t, test.tpe, blob.Type)
rtest.Equals(t, test.offset, blob.Offset)
rtest.Equals(t, test.length, blob.Length)
}
rtest.Equals(t, oldIdx, idx.Supersedes())
blobs := idx.ListPack(exampleLookupTest.packID)
if len(blobs) != len(exampleLookupTest.blobs) {
t.Fatalf("expected %d blobs in pack, got %d", len(exampleLookupTest.blobs), len(blobs))
}
for _, blob := range blobs {
b, ok := exampleLookupTest.blobs[blob.ID]
if !ok {
t.Errorf("unexpected blob %v found", blob.ID.Str())
blobs := idx.ListPack(exampleLookupTest.packID)
if len(blobs) != len(exampleLookupTest.blobs) {
t.Fatalf("expected %d blobs in pack, got %d", len(exampleLookupTest.blobs), len(blobs))
}
if blob.Type != b {
t.Errorf("unexpected type for blob %v: want %v, got %v", blob.ID.Str(), b, blob.Type)
for _, blob := range blobs {
b, ok := exampleLookupTest.blobs[blob.ID]
if !ok {
t.Errorf("unexpected blob %v found", blob.ID.Str())
}
if blob.Type != b {
t.Errorf("unexpected type for blob %v: want %v, got %v", blob.ID.Str(), b, blob.Type)
}
}
}
}
@ -403,8 +460,9 @@ func createRandomIndex(rng *rand.Rand, packfiles int) (idx *repository.Index, lo
Type: restic.DataBlob,
ID: id,
},
Length: uint(size),
Offset: uint(offset),
Length: uint(size),
UncompressedLength: uint(2 * size),
Offset: uint(offset),
})
offset += size
@ -475,11 +533,17 @@ func TestIndexHas(t *testing.T) {
pos := uint(0)
for j := 0; j < 20; j++ {
length := uint(i*100 + j)
uncompressedLength := uint(0)
if i >= 25 {
// test a mix of compressed and uncompressed packs
uncompressedLength = 2 * length
}
pb := restic.PackedBlob{
Blob: restic.Blob{
BlobHandle: restic.NewRandomBlobHandle(),
Offset: pos,
Length: length,
BlobHandle: restic.NewRandomBlobHandle(),
Offset: pos,
Length: length,
UncompressedLength: uncompressedLength,
},
PackID: packID,
}

View File

@ -32,7 +32,7 @@ const (
// add inserts an indexEntry for the given arguments into the map,
// using id as the key.
func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32) {
func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32, uncompressedLength uint32) {
switch {
case m.numentries == 0: // Lazy initialization.
m.init()
@ -47,6 +47,7 @@ func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32) {
e.packIndex = packIdx
e.offset = offset
e.length = length
e.uncompressedLength = uncompressedLength
m.buckets[h] = e
m.numentries++
@ -130,12 +131,12 @@ func (m *indexMap) len() uint { return m.numentries }
func (m *indexMap) newEntry() *indexEntry {
// Allocating in batches means that we get closer to optimal space usage,
// as Go's malloc will overallocate for structures of size 56 (indexEntry
// as Go's malloc will overallocate for structures of size 60 (indexEntry
// on amd64).
//
// 256*56 and 256*48 both have minimal malloc overhead among reasonable sizes.
// 128*60 and 128*60 both have low malloc overhead among reasonable sizes.
// See src/runtime/sizeclasses.go in the standard library.
const entryAllocBatch = 256
const entryAllocBatch = 128
if m.free == nil {
free := new([entryAllocBatch]indexEntry)
@ -152,9 +153,10 @@ func (m *indexMap) newEntry() *indexEntry {
}
type indexEntry struct {
id restic.ID
next *indexEntry
packIndex int // Position in containing Index's packs field.
offset uint32
length uint32
id restic.ID
next *indexEntry
packIndex int // Position in containing Index's packs field.
offset uint32
length uint32
uncompressedLength uint32
}

View File

@ -22,7 +22,7 @@ func TestIndexMapBasic(t *testing.T) {
r.Read(id[:])
rtest.Assert(t, m.get(id) == nil, "%v retrieved but not added", id)
m.add(id, 0, 0, 0)
m.add(id, 0, 0, 0, 0)
rtest.Assert(t, m.get(id) != nil, "%v added but not retrieved", id)
rtest.Equals(t, uint(i), m.len())
}
@ -41,7 +41,7 @@ func TestIndexMapForeach(t *testing.T) {
for i := 0; i < N; i++ {
var id restic.ID
id[0] = byte(i)
m.add(id, i, uint32(i), uint32(i))
m.add(id, i, uint32(i), uint32(i), uint32(i/2))
}
seen := make(map[int]struct{})
@ -51,6 +51,7 @@ func TestIndexMapForeach(t *testing.T) {
rtest.Equals(t, i, e.packIndex)
rtest.Equals(t, i, int(e.length))
rtest.Equals(t, i, int(e.offset))
rtest.Equals(t, i/2, int(e.uncompressedLength))
seen[i] = struct{}{}
return true
@ -85,13 +86,13 @@ func TestIndexMapForeachWithID(t *testing.T) {
// Test insertion and retrieval of duplicates.
for i := 0; i < ndups; i++ {
m.add(id, i, 0, 0)
m.add(id, i, 0, 0, 0)
}
for i := 0; i < 100; i++ {
var otherid restic.ID
r.Read(otherid[:])
m.add(otherid, -1, 0, 0)
m.add(otherid, -1, 0, 0, 0)
}
n = 0
@ -109,7 +110,7 @@ func TestIndexMapForeachWithID(t *testing.T) {
func BenchmarkIndexMapHash(b *testing.B) {
var m indexMap
m.add(restic.ID{}, 0, 0, 0) // Trigger lazy initialization.
m.add(restic.ID{}, 0, 0, 0, 0) // Trigger lazy initialization.
ids := make([]restic.ID, 128) // 4 KiB.
r := rand.New(rand.NewSource(time.Now().UnixNano()))

View File

@ -16,6 +16,7 @@ type MasterIndex struct {
idx []*Index
pendingBlobs restic.BlobSet
idxMutex sync.RWMutex
compress bool
}
// NewMasterIndex creates a new master index.
@ -28,6 +29,10 @@ func NewMasterIndex() *MasterIndex {
return &MasterIndex{idx: idx, pendingBlobs: restic.NewBlobSet()}
}
func (mi *MasterIndex) markCompressed() {
mi.compress = true
}
// Lookup queries all known Indexes for the ID and returns all matches.
func (mi *MasterIndex) Lookup(bh restic.BlobHandle) (pbs []restic.PackedBlob) {
mi.idxMutex.RLock()
@ -206,7 +211,7 @@ func (mi *MasterIndex) FinalizeFullIndexes() []*Index {
continue
}
if IndexFull(idx) {
if IndexFull(idx, mi.compress) {
debug.Log("index %p is full", idx)
idx.Finalize()
list = append(list, idx)
@ -334,7 +339,7 @@ func (mi *MasterIndex) Save(ctx context.Context, repo restic.Repository, packBla
for pbs := range idx.EachByPack(ctx, packBlacklist) {
newIndex.StorePack(pbs.packID, pbs.blobs)
p.Add(1)
if IndexFull(newIndex) {
if IndexFull(newIndex, mi.compress) {
select {
case ch <- newIndex:
case <-ctx.Done():

View File

@ -30,9 +30,10 @@ func TestMasterIndex(t *testing.T) {
blob2 := restic.PackedBlob{
PackID: restic.NewRandomID(),
Blob: restic.Blob{
BlobHandle: bhInIdx2,
Length: uint(restic.CiphertextLength(100)),
Offset: 10,
BlobHandle: bhInIdx2,
Length: uint(restic.CiphertextLength(100)),
Offset: 10,
UncompressedLength: 200,
},
}
@ -48,9 +49,10 @@ func TestMasterIndex(t *testing.T) {
blob12b := restic.PackedBlob{
PackID: restic.NewRandomID(),
Blob: restic.Blob{
BlobHandle: bhInIdx12,
Length: uint(restic.CiphertextLength(123)),
Offset: 50,
BlobHandle: bhInIdx12,
Length: uint(restic.CiphertextLength(123)),
Offset: 50,
UncompressedLength: 80,
},
}
@ -86,7 +88,7 @@ func TestMasterIndex(t *testing.T) {
size, found = mIdx.LookupSize(bhInIdx2)
rtest.Equals(t, true, found)
rtest.Equals(t, uint(100), size)
rtest.Equals(t, uint(200), size)
// test idInIdx12
found = mIdx.Has(bhInIdx12)
@ -144,9 +146,10 @@ func TestMasterMergeFinalIndexes(t *testing.T) {
blob2 := restic.PackedBlob{
PackID: restic.NewRandomID(),
Blob: restic.Blob{
BlobHandle: bhInIdx2,
Length: 100,
Offset: 10,
BlobHandle: bhInIdx2,
Length: 100,
Offset: 10,
UncompressedLength: 200,
},
}
@ -335,8 +338,8 @@ var (
depth = 3
)
func createFilledRepo(t testing.TB, snapshots int, dup float32) (restic.Repository, func()) {
repo, cleanup := repository.TestRepository(t)
func createFilledRepo(t testing.TB, snapshots int, dup float32, version uint) (restic.Repository, func()) {
repo, cleanup := repository.TestRepositoryWithVersion(t, version)
for i := 0; i < 3; i++ {
restic.TestCreateSnapshot(t, repo, snapshotTime.Add(time.Duration(i)*time.Second), depth, dup)
@ -346,7 +349,11 @@ func createFilledRepo(t testing.TB, snapshots int, dup float32) (restic.Reposito
}
func TestIndexSave(t *testing.T) {
repo, cleanup := createFilledRepo(t, 3, 0)
repository.TestAllVersions(t, testIndexSave)
}
func testIndexSave(t *testing.T, version uint) {
repo, cleanup := createFilledRepo(t, 3, 0, version)
defer cleanup()
err := repo.LoadIndex(context.TODO())

View File

@ -70,7 +70,7 @@ func fillPacks(t testing.TB, rnd *rand.Rand, be Saver, pm *packerManager, buf []
// Only change a few bytes so we know we're not benchmarking the RNG.
rnd.Read(buf[:min(l, 4)])
n, err := packer.Add(restic.DataBlob, id, buf)
n, err := packer.Add(restic.DataBlob, id, buf, 0)
if err != nil {
t.Fatal(err)
}

View File

@ -212,7 +212,11 @@ func reloadIndex(t *testing.T, repo restic.Repository) {
}
func TestRepack(t *testing.T) {
repo, cleanup := repository.TestRepository(t)
repository.TestAllVersions(t, testRepack)
}
func testRepack(t *testing.T, version uint) {
repo, cleanup := repository.TestRepositoryWithVersion(t, version)
defer cleanup()
seed := time.Now().UnixNano()
@ -279,9 +283,13 @@ func TestRepack(t *testing.T) {
}
func TestRepackCopy(t *testing.T) {
repo, cleanup := repository.TestRepository(t)
repository.TestAllVersions(t, testRepackCopy)
}
func testRepackCopy(t *testing.T, version uint) {
repo, cleanup := repository.TestRepositoryWithVersion(t, version)
defer cleanup()
dstRepo, dstCleanup := repository.TestRepository(t)
dstRepo, dstCleanup := repository.TestRepositoryWithVersion(t, version)
defer dstCleanup()
seed := time.Now().UnixNano()
@ -318,7 +326,11 @@ func TestRepackCopy(t *testing.T) {
}
func TestRepackWrongBlob(t *testing.T) {
repo, cleanup := repository.TestRepository(t)
repository.TestAllVersions(t, testRepackWrongBlob)
}
func testRepackWrongBlob(t *testing.T, version uint) {
repo, cleanup := repository.TestRepositoryWithVersion(t, version)
defer cleanup()
seed := time.Now().UnixNano()

View File

@ -12,6 +12,7 @@ import (
"sync"
"github.com/cenkalti/backoff/v4"
"github.com/klauspost/compress/zstd"
"github.com/restic/chunker"
"github.com/restic/restic/internal/backend/dryrun"
"github.com/restic/restic/internal/cache"
@ -36,16 +37,71 @@ type Repository struct {
idx *MasterIndex
Cache *cache.Cache
opts Options
noAutoIndexUpdate bool
treePM *packerManager
dataPM *packerManager
allocEnc sync.Once
allocDec sync.Once
enc *zstd.Encoder
dec *zstd.Decoder
}
type Options struct {
Compression CompressionMode
}
// CompressionMode configures if data should be compressed.
type CompressionMode uint
// Constants for the different compression levels.
const (
CompressionAuto CompressionMode = 0
CompressionOff CompressionMode = 1
CompressionMax CompressionMode = 2
)
// Set implements the method needed for pflag command flag parsing.
func (c *CompressionMode) Set(s string) error {
switch s {
case "auto":
*c = CompressionAuto
case "off":
*c = CompressionOff
case "max":
*c = CompressionMax
default:
return fmt.Errorf("invalid compression mode %q, must be one of (auto|off|max)", s)
}
return nil
}
func (c *CompressionMode) String() string {
switch *c {
case CompressionAuto:
return "auto"
case CompressionOff:
return "off"
case CompressionMax:
return "max"
default:
return "invalid"
}
}
func (c *CompressionMode) Type() string {
return "mode"
}
// New returns a new repository with backend be.
func New(be restic.Backend) *Repository {
func New(be restic.Backend, opts Options) *Repository {
repo := &Repository{
be: be,
opts: opts,
idx: NewMasterIndex(),
dataPM: newPackerManager(be, nil),
treePM: newPackerManager(be, nil),
@ -60,6 +116,14 @@ func (r *Repository) DisableAutoIndexUpdate() {
r.noAutoIndexUpdate = true
}
// setConfig assigns the given config and updates the repository parameters accordingly
func (r *Repository) setConfig(cfg restic.Config) {
r.cfg = cfg
if r.cfg.Version >= 2 {
r.idx.markCompressed()
}
}
// Config returns the repository configuration.
func (r *Repository) Config() restic.Config {
return r.cfg
@ -125,6 +189,9 @@ func (r *Repository) LoadUnpacked(ctx context.Context, buf []byte, t restic.File
if err != nil {
return nil, err
}
if t != restic.ConfigFile {
return r.decompressUnpacked(plaintext)
}
return plaintext, nil
}
@ -218,12 +285,23 @@ func (r *Repository) LoadBlob(ctx context.Context, t restic.BlobType, id restic.
continue
}
if blob.IsCompressed() {
plaintext, err = r.getZstdDecoder().DecodeAll(plaintext, make([]byte, 0, blob.DataLength()))
if err != nil {
lastError = errors.Errorf("decompressing blob %v failed: %v", id, err)
continue
}
}
// check hash
if !restic.Hash(plaintext).Equal(id) {
lastError = errors.Errorf("blob %v returned invalid hash", id)
continue
}
if len(plaintext) > cap(buf) {
return plaintext, nil
}
// move decrypted data to the start of the buffer
copy(buf, plaintext)
return buf[:len(plaintext)], nil
@ -252,12 +330,70 @@ func (r *Repository) LookupBlobSize(id restic.ID, tpe restic.BlobType) (uint, bo
return r.idx.LookupSize(restic.BlobHandle{ID: id, Type: tpe})
}
func (r *Repository) getZstdEncoder() *zstd.Encoder {
r.allocEnc.Do(func() {
level := zstd.SpeedDefault
if r.opts.Compression == CompressionMax {
level = zstd.SpeedBestCompression
}
opts := []zstd.EOption{
// Set the compression level configured.
zstd.WithEncoderLevel(level),
// Disable CRC, we have enough checks in place, makes the
// compressed data four bytes shorter.
zstd.WithEncoderCRC(false),
// Set a window of 512kbyte, so we have good lookbehind for usual
// blob sizes.
zstd.WithWindowSize(512 * 1024),
}
enc, err := zstd.NewWriter(nil, opts...)
if err != nil {
panic(err)
}
r.enc = enc
})
return r.enc
}
func (r *Repository) getZstdDecoder() *zstd.Decoder {
r.allocDec.Do(func() {
opts := []zstd.DOption{
// Use all available cores.
zstd.WithDecoderConcurrency(0),
// Limit the maximum decompressed memory. Set to a very high,
// conservative value.
zstd.WithDecoderMaxMemory(16 * 1024 * 1024 * 1024),
}
dec, err := zstd.NewReader(nil, opts...)
if err != nil {
panic(err)
}
r.dec = dec
})
return r.dec
}
// saveAndEncrypt encrypts data and stores it to the backend as type t. If data
// is small enough, it will be packed together with other small blobs.
// The caller must ensure that the id matches the data.
func (r *Repository) saveAndEncrypt(ctx context.Context, t restic.BlobType, data []byte, id restic.ID) error {
debug.Log("save id %v (%v, %d bytes)", id, t, len(data))
uncompressedLength := 0
if r.cfg.Version > 1 {
// we have a repo v2, so compression is available. if the user opts to
// not compress, we won't compress any data, but everything else is
// compressed.
if r.opts.Compression != CompressionOff || t != restic.DataBlob {
uncompressedLength = len(data)
data = r.getZstdEncoder().EncodeAll(data, nil)
}
}
nonce := crypto.NewRandomNonce()
ciphertext := make([]byte, 0, restic.CiphertextLength(len(data)))
@ -284,7 +420,7 @@ func (r *Repository) saveAndEncrypt(ctx context.Context, t restic.BlobType, data
}
// save ciphertext
_, err = packer.Add(t, id, ciphertext)
_, err = packer.Add(t, id, ciphertext, uncompressedLength)
if err != nil {
return err
}
@ -312,9 +448,50 @@ func (r *Repository) SaveJSONUnpacked(ctx context.Context, t restic.FileType, it
return r.SaveUnpacked(ctx, t, plaintext)
}
func (r *Repository) compressUnpacked(p []byte) ([]byte, error) {
// compression is only available starting from version 2
if r.cfg.Version < 2 {
return p, nil
}
// version byte
out := []byte{2}
out = r.getZstdEncoder().EncodeAll(p, out)
return out, nil
}
func (r *Repository) decompressUnpacked(p []byte) ([]byte, error) {
// compression is only available starting from version 2
if r.cfg.Version < 2 {
return p, nil
}
if len(p) == 0 {
// too short for version header
return p, nil
}
if p[0] == '[' || p[0] == '{' {
// probably raw JSON
return p, nil
}
// version
if p[0] != 2 {
return nil, errors.New("not supported encoding format")
}
return r.getZstdDecoder().DecodeAll(p[1:], nil)
}
// SaveUnpacked encrypts data and stores it in the backend. Returned is the
// storage hash.
func (r *Repository) SaveUnpacked(ctx context.Context, t restic.FileType, p []byte) (id restic.ID, err error) {
if t != restic.ConfigFile {
p, err = r.compressUnpacked(p)
if err != nil {
return restic.ID{}, err
}
}
ciphertext := restic.NewBlobBuffer(len(p))
ciphertext = ciphertext[:0]
nonce := crypto.NewRandomNonce()
@ -478,6 +655,17 @@ func (r *Repository) LoadIndex(ctx context.Context) error {
return err
}
if r.cfg.Version < 2 {
// sanity check
ctx, cancel := context.WithCancel(ctx)
defer cancel()
for blob := range r.idx.Each(ctx) {
if blob.IsCompressed() {
return errors.Fatal("index uses feature not supported by repository version 1")
}
}
}
// remove index files from the cache which have been removed in the repo
return r.PrepareCache(validIndex)
}
@ -592,18 +780,28 @@ func (r *Repository) SearchKey(ctx context.Context, password string, maxKeys int
r.dataPM.key = key.master
r.treePM.key = key.master
r.keyName = key.Name()
r.cfg, err = restic.LoadConfig(ctx, r)
cfg, err := restic.LoadConfig(ctx, r)
if err == crypto.ErrUnauthenticated {
return errors.Fatalf("config or key %v is damaged: %v", key.Name(), err)
} else if err != nil {
return errors.Fatalf("config cannot be loaded: %v", err)
}
r.setConfig(cfg)
return nil
}
// Init creates a new master key with the supplied password, initializes and
// saves the repository config.
func (r *Repository) Init(ctx context.Context, password string, chunkerPolynomial *chunker.Pol) error {
func (r *Repository) Init(ctx context.Context, version uint, password string, chunkerPolynomial *chunker.Pol) error {
if version > restic.MaxRepoVersion {
return fmt.Errorf("repo version %v too high", version)
}
if version < restic.MinRepoVersion {
return fmt.Errorf("repo version %v too low", version)
}
has, err := r.be.Test(ctx, restic.Handle{Type: restic.ConfigFile})
if err != nil {
return err
@ -612,7 +810,7 @@ func (r *Repository) Init(ctx context.Context, password string, chunkerPolynomia
return errors.New("repository master key and config already initialized")
}
cfg, err := restic.CreateConfig()
cfg, err := restic.CreateConfig(version)
if err != nil {
return err
}
@ -635,7 +833,7 @@ func (r *Repository) init(ctx context.Context, password string, cfg restic.Confi
r.dataPM.key = key.master
r.treePM.key = key.master
r.keyName = key.Name()
r.cfg = cfg
r.setConfig(cfg)
_, err = r.SaveJSONUnpacked(ctx, restic.ConfigFile, cfg)
return err
}
@ -768,9 +966,15 @@ func StreamPack(ctx context.Context, beLoad BackendLoadFn, key *crypto.Key, pack
debug.Log("streaming pack %v (%d to %d bytes), blobs: %v", packID, dataStart, dataEnd, len(blobs))
dec, err := zstd.NewReader(nil)
if err != nil {
panic(dec)
}
defer dec.Close()
ctx, cancel := context.WithCancel(ctx)
// stream blobs in pack
err := beLoad(ctx, h, int(dataEnd-dataStart), int64(dataStart), func(rd io.Reader) error {
err = beLoad(ctx, h, int(dataEnd-dataStart), int64(dataStart), func(rd io.Reader) error {
// prevent callbacks after cancelation
if ctx.Err() != nil {
return ctx.Err()
@ -783,6 +987,7 @@ func StreamPack(ctx context.Context, beLoad BackendLoadFn, key *crypto.Key, pack
bufRd := bufio.NewReaderSize(rd, bufferSize)
currentBlobEnd := dataStart
var buf []byte
var decode []byte
for _, entry := range blobs {
skipBytes := int(entry.Offset - currentBlobEnd)
if skipBytes < 0 {
@ -822,6 +1027,15 @@ func StreamPack(ctx context.Context, beLoad BackendLoadFn, key *crypto.Key, pack
// decryption errors are likely permanent, give the caller a chance to skip them
nonce, ciphertext := buf[:key.NonceSize()], buf[key.NonceSize():]
plaintext, err := key.Open(ciphertext[:0], nonce, ciphertext, nil)
if err == nil && entry.IsCompressed() {
// DecodeAll will allocate a slice if it is not large enough since it
// knows the decompressed size (because we're using EncodeAll)
decode, err = dec.DecodeAll(plaintext, decode[:0])
plaintext = decode
if err != nil {
err = errors.Errorf("decompressing blob %v failed: %v", h, err)
}
}
if err == nil {
id := restic.Hash(plaintext)
if !id.Equal(entry.ID) {

View File

@ -15,6 +15,7 @@ import (
"time"
"github.com/google/go-cmp/cmp"
"github.com/klauspost/compress/zstd"
"github.com/restic/restic/internal/archiver"
"github.com/restic/restic/internal/crypto"
"github.com/restic/restic/internal/repository"
@ -28,7 +29,11 @@ var testSizes = []int{5, 23, 2<<18 + 23, 1 << 20}
var rnd = rand.New(rand.NewSource(time.Now().UnixNano()))
func TestSave(t *testing.T) {
repo, cleanup := repository.TestRepository(t)
repository.TestAllVersions(t, testSave)
}
func testSave(t *testing.T, version uint) {
repo, cleanup := repository.TestRepositoryWithVersion(t, version)
defer cleanup()
for _, size := range testSizes {
@ -63,7 +68,11 @@ func TestSave(t *testing.T) {
}
func TestSaveFrom(t *testing.T) {
repo, cleanup := repository.TestRepository(t)
repository.TestAllVersions(t, testSaveFrom)
}
func testSaveFrom(t *testing.T, version uint) {
repo, cleanup := repository.TestRepositoryWithVersion(t, version)
defer cleanup()
for _, size := range testSizes {
@ -96,7 +105,11 @@ func TestSaveFrom(t *testing.T) {
}
func BenchmarkSaveAndEncrypt(t *testing.B) {
repo, cleanup := repository.TestRepository(t)
repository.BenchmarkAllVersions(t, benchmarkSaveAndEncrypt)
}
func benchmarkSaveAndEncrypt(t *testing.B, version uint) {
repo, cleanup := repository.TestRepositoryWithVersion(t, version)
defer cleanup()
size := 4 << 20 // 4MiB
@ -118,7 +131,11 @@ func BenchmarkSaveAndEncrypt(t *testing.B) {
}
func TestLoadTree(t *testing.T) {
repo, cleanup := repository.TestRepository(t)
repository.TestAllVersions(t, testLoadTree)
}
func testLoadTree(t *testing.T, version uint) {
repo, cleanup := repository.TestRepositoryWithVersion(t, version)
defer cleanup()
if rtest.BenchArchiveDirectory == "" {
@ -134,7 +151,11 @@ func TestLoadTree(t *testing.T) {
}
func BenchmarkLoadTree(t *testing.B) {
repo, cleanup := repository.TestRepository(t)
repository.BenchmarkAllVersions(t, benchmarkLoadTree)
}
func benchmarkLoadTree(t *testing.B, version uint) {
repo, cleanup := repository.TestRepositoryWithVersion(t, version)
defer cleanup()
if rtest.BenchArchiveDirectory == "" {
@ -154,7 +175,11 @@ func BenchmarkLoadTree(t *testing.B) {
}
func TestLoadBlob(t *testing.T) {
repo, cleanup := repository.TestRepository(t)
repository.TestAllVersions(t, testLoadBlob)
}
func testLoadBlob(t *testing.T, version uint) {
repo, cleanup := repository.TestRepositoryWithVersion(t, version)
defer cleanup()
length := 1000000
@ -183,7 +208,11 @@ func TestLoadBlob(t *testing.T) {
}
func BenchmarkLoadBlob(b *testing.B) {
repo, cleanup := repository.TestRepository(b)
repository.BenchmarkAllVersions(b, benchmarkLoadBlob)
}
func benchmarkLoadBlob(b *testing.B, version uint) {
repo, cleanup := repository.TestRepositoryWithVersion(b, version)
defer cleanup()
length := 1000000
@ -219,7 +248,11 @@ func BenchmarkLoadBlob(b *testing.B) {
}
func BenchmarkLoadUnpacked(b *testing.B) {
repo, cleanup := repository.TestRepository(b)
repository.BenchmarkAllVersions(b, benchmarkLoadUnpacked)
}
func benchmarkLoadUnpacked(b *testing.B, version uint) {
repo, cleanup := repository.TestRepositoryWithVersion(b, version)
defer cleanup()
length := 1000000
@ -255,7 +288,11 @@ func BenchmarkLoadUnpacked(b *testing.B) {
}
func TestLoadJSONUnpacked(t *testing.T) {
repo, cleanup := repository.TestRepository(t)
repository.TestAllVersions(t, testLoadJSONUnpacked)
}
func testLoadJSONUnpacked(t *testing.T, version uint) {
repo, cleanup := repository.TestRepositoryWithVersion(t, version)
defer cleanup()
if rtest.BenchArchiveDirectory == "" {
@ -313,9 +350,13 @@ func loadIndex(ctx context.Context, repo restic.Repository, id restic.ID) (*repo
}
func BenchmarkLoadIndex(b *testing.B) {
repository.BenchmarkAllVersions(b, benchmarkLoadIndex)
}
func benchmarkLoadIndex(b *testing.B, version uint) {
repository.TestUseLowSecurityKDFParameters(b)
repo, cleanup := repository.TestRepository(b)
repo, cleanup := repository.TestRepositoryWithVersion(b, version)
defer cleanup()
idx := repository.NewIndex()
@ -362,12 +403,16 @@ func saveRandomDataBlobs(t testing.TB, repo restic.Repository, num int, sizeMax
}
func TestRepositoryIncrementalIndex(t *testing.T) {
r, cleanup := repository.TestRepository(t)
repository.TestAllVersions(t, testRepositoryIncrementalIndex)
}
func testRepositoryIncrementalIndex(t *testing.T, version uint) {
r, cleanup := repository.TestRepositoryWithVersion(t, version)
defer cleanup()
repo := r.(*repository.Repository)
repository.IndexFull = func(*repository.Index) bool { return true }
repository.IndexFull = func(*repository.Index, bool) bool { return true }
// add 15 packs
for j := 0; j < 5; j++ {
@ -417,10 +462,31 @@ func TestRepositoryIncrementalIndex(t *testing.T) {
}
// buildPackfileWithoutHeader returns a manually built pack file without a header.
func buildPackfileWithoutHeader(t testing.TB, blobSizes []int, key *crypto.Key) (blobs []restic.Blob, packfile []byte) {
func buildPackfileWithoutHeader(t testing.TB, blobSizes []int, key *crypto.Key, compress bool) (blobs []restic.Blob, packfile []byte) {
opts := []zstd.EOption{
// Set the compression level configured.
zstd.WithEncoderLevel(zstd.SpeedDefault),
// Disable CRC, we have enough checks in place, makes the
// compressed data four bytes shorter.
zstd.WithEncoderCRC(false),
// Set a window of 512kbyte, so we have good lookbehind for usual
// blob sizes.
zstd.WithWindowSize(512 * 1024),
}
enc, err := zstd.NewWriter(nil, opts...)
if err != nil {
panic(err)
}
var offset uint
for i, size := range blobSizes {
plaintext := test.Random(800+i, size)
id := restic.Hash(plaintext)
uncompressedLength := uint(0)
if compress {
uncompressedLength = uint(len(plaintext))
plaintext = enc.EncodeAll(plaintext, nil)
}
// we use a deterministic nonce here so the whole process is
// deterministic, last byte is the blob index
@ -438,11 +504,12 @@ func buildPackfileWithoutHeader(t testing.TB, blobSizes []int, key *crypto.Key)
blobs = append(blobs, restic.Blob{
BlobHandle: restic.BlobHandle{
ID: restic.Hash(plaintext),
Type: restic.DataBlob,
ID: id,
},
Length: uint(ciphertextLength),
Offset: offset,
Length: uint(ciphertextLength),
UncompressedLength: uncompressedLength,
Offset: offset,
})
offset = uint(len(packfile))
@ -452,6 +519,10 @@ func buildPackfileWithoutHeader(t testing.TB, blobSizes []int, key *crypto.Key)
}
func TestStreamPack(t *testing.T) {
repository.TestAllVersions(t, testStreamPack)
}
func testStreamPack(t *testing.T, version uint) {
// always use the same key for deterministic output
const jsonKey = `{"mac":{"k":"eQenuI8adktfzZMuC8rwdA==","r":"k8cfAly2qQSky48CQK7SBA=="},"encrypt":"MKO9gZnRiQFl8mDUurSDa9NMjiu9MUifUrODTHS05wo="}`
@ -476,7 +547,17 @@ func TestStreamPack(t *testing.T) {
18883,
}
packfileBlobs, packfile := buildPackfileWithoutHeader(t, blobSizes, &key)
var compress bool
switch version {
case 1:
compress = false
case 2:
compress = true
default:
t.Fatal("test does not suport repository version", version)
}
packfileBlobs, packfile := buildPackfileWithoutHeader(t, blobSizes, &key, compress)
load := func(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error {
data := packfile

View File

@ -2,6 +2,7 @@ package repository
import (
"context"
"fmt"
"os"
"testing"
@ -41,7 +42,7 @@ const TestChunkerPol = chunker.Pol(0x3DA3358B4DC173)
// TestRepositoryWithBackend returns a repository initialized with a test
// password. If be is nil, an in-memory backend is used. A constant polynomial
// is used for the chunker and low-security test parameters.
func TestRepositoryWithBackend(t testing.TB, be restic.Backend) (r restic.Repository, cleanup func()) {
func TestRepositoryWithBackend(t testing.TB, be restic.Backend, version uint) (r restic.Repository, cleanup func()) {
t.Helper()
TestUseLowSecurityKDFParameters(t)
restic.TestDisableCheckPolynomial(t)
@ -51,9 +52,9 @@ func TestRepositoryWithBackend(t testing.TB, be restic.Backend) (r restic.Reposi
be, beCleanup = TestBackend(t)
}
repo := New(be)
repo := New(be, Options{})
cfg := restic.TestCreateConfig(t, TestChunkerPol)
cfg := restic.TestCreateConfig(t, TestChunkerPol, version)
err := repo.init(context.TODO(), test.TestPassword, cfg)
if err != nil {
t.Fatalf("TestRepository(): initialize repo failed: %v", err)
@ -71,6 +72,11 @@ func TestRepositoryWithBackend(t testing.TB, be restic.Backend) (r restic.Reposi
// a non-existing directory, a local backend is created there and this is used
// instead. The directory is not removed, but left there for inspection.
func TestRepository(t testing.TB) (r restic.Repository, cleanup func()) {
t.Helper()
return TestRepositoryWithVersion(t, 0)
}
func TestRepositoryWithVersion(t testing.TB, version uint) (r restic.Repository, cleanup func()) {
t.Helper()
dir := os.Getenv("RESTIC_TEST_REPO")
if dir != "" {
@ -80,7 +86,7 @@ func TestRepository(t testing.TB) (r restic.Repository, cleanup func()) {
if err != nil {
t.Fatalf("error creating local backend at %v: %v", dir, err)
}
return TestRepositoryWithBackend(t, be)
return TestRepositoryWithBackend(t, be, version)
}
if err == nil {
@ -88,7 +94,7 @@ func TestRepository(t testing.TB) (r restic.Repository, cleanup func()) {
}
}
return TestRepositoryWithBackend(t, nil)
return TestRepositoryWithBackend(t, nil, version)
}
// TestOpenLocal opens a local repository.
@ -98,7 +104,7 @@ func TestOpenLocal(t testing.TB, dir string) (r restic.Repository) {
t.Fatal(err)
}
repo := New(be)
repo := New(be, Options{})
err = repo.SearchKey(context.TODO(), test.TestPassword, 10, "")
if err != nil {
t.Fatal(err)
@ -106,3 +112,23 @@ func TestOpenLocal(t testing.TB, dir string) (r restic.Repository) {
return repo
}
type VersionedTest func(t *testing.T, version uint)
func TestAllVersions(t *testing.T, test VersionedTest) {
for version := restic.MinRepoVersion; version <= restic.MaxRepoVersion; version++ {
t.Run(fmt.Sprintf("v%d", version), func(t *testing.T) {
test(t, uint(version))
})
}
}
type VersionedBenchmark func(b *testing.B, version uint)
func BenchmarkAllVersions(b *testing.B, bench VersionedBenchmark) {
for version := restic.MinRepoVersion; version <= restic.MaxRepoVersion; version++ {
b.Run(fmt.Sprintf("v%d", version), func(b *testing.B) {
bench(b, uint(version))
})
}
}

View File

@ -9,13 +9,25 @@ import (
// Blob is one part of a file or a tree.
type Blob struct {
BlobHandle
Length uint
Offset uint
Length uint
Offset uint
UncompressedLength uint
}
func (b Blob) String() string {
return fmt.Sprintf("<Blob (%v) %v, offset %v, length %v>",
b.Type, b.ID.Str(), b.Offset, b.Length)
return fmt.Sprintf("<Blob (%v) %v, offset %v, length %v, uncompressed length %v>",
b.Type, b.ID.Str(), b.Offset, b.Length, b.UncompressedLength)
}
func (b Blob) DataLength() uint {
if b.UncompressedLength != 0 {
return b.UncompressedLength
}
return uint(PlaintextLength(int(b.Length)))
}
func (b Blob) IsCompressed() bool {
return b.UncompressedLength != 0
}
// PackedBlob is a blob stored within a file.

View File

@ -18,9 +18,12 @@ type Config struct {
ChunkerPolynomial chunker.Pol `json:"chunker_polynomial"`
}
// RepoVersion is the version that is written to the config when a repository
const MinRepoVersion = 1
const MaxRepoVersion = 2
// StableRepoVersion is the version that is written to the config when a repository
// is newly created with Init().
const RepoVersion = 1
const StableRepoVersion = 1
// JSONUnpackedLoader loads unpacked JSON.
type JSONUnpackedLoader interface {
@ -29,7 +32,7 @@ type JSONUnpackedLoader interface {
// CreateConfig creates a config file with a randomly selected polynomial and
// ID.
func CreateConfig() (Config, error) {
func CreateConfig(version uint) (Config, error) {
var (
err error
cfg Config
@ -41,18 +44,24 @@ func CreateConfig() (Config, error) {
}
cfg.ID = NewRandomID().String()
cfg.Version = RepoVersion
cfg.Version = version
debug.Log("New config: %#v", cfg)
return cfg, nil
}
// TestCreateConfig creates a config for use within tests.
func TestCreateConfig(t testing.TB, pol chunker.Pol) (cfg Config) {
func TestCreateConfig(t testing.TB, pol chunker.Pol, version uint) (cfg Config) {
cfg.ChunkerPolynomial = pol
cfg.ID = NewRandomID().String()
cfg.Version = RepoVersion
if version == 0 {
version = StableRepoVersion
}
if version < MinRepoVersion || version > MaxRepoVersion {
t.Fatalf("version %d is out of range", version)
}
cfg.Version = version
return cfg
}
@ -77,7 +86,7 @@ func LoadConfig(ctx context.Context, r JSONUnpackedLoader) (Config, error) {
return Config{}, err
}
if cfg.Version != RepoVersion {
if cfg.Version < MinRepoVersion || cfg.Version > MaxRepoVersion {
return Config{}, errors.Errorf("unsupported repository version %v", cfg.Version)
}

View File

@ -32,7 +32,7 @@ func TestConfig(t *testing.T) {
return restic.ID{}, nil
}
cfg1, err := restic.CreateConfig()
cfg1, err := restic.CreateConfig(restic.MaxRepoVersion)
rtest.OK(t, err)
_, err = saver(save).SaveJSONUnpacked(restic.ConfigFile, cfg1)

View File

@ -117,7 +117,7 @@ func (r *fileRestorer) restoreFiles(ctx context.Context) error {
err := r.forEachBlob(fileBlobs, func(packID restic.ID, blob restic.Blob) {
if largeFile {
packsMap[packID] = append(packsMap[packID], fileBlobInfo{id: blob.ID, offset: fileOffset})
fileOffset += int64(restic.PlaintextLength(int(blob.Length)))
fileOffset += int64(blob.DataLength())
}
pack, ok := packs[packID]
if !ok {
@ -195,7 +195,7 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) error {
if packID.Equal(pack.id) {
addBlob(blob, fileOffset)
}
fileOffset += int64(restic.PlaintextLength(int(blob.Length)))
fileOffset += int64(blob.DataLength())
})
if err != nil {
// restoreFiles should have caught this error before