From e07ae7631c8153e385c210357935812051eecde3 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Tue, 23 Aug 2016 22:21:29 +0200 Subject: [PATCH 1/5] Add more safety checks for Unpacker --- src/restic/pack/pack.go | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/restic/pack/pack.go b/src/restic/pack/pack.go index 78159dfa3..04b752123 100644 --- a/src/restic/pack/pack.go +++ b/src/restic/pack/pack.go @@ -235,7 +235,10 @@ type Unpacker struct { k *crypto.Key } -const preloadHeaderSize = 2048 +const ( + preloadHeaderSize = 2048 + maxHeaderSize = 16 * 1024 * 1024 +) // NewUnpacker returns a pointer to Unpacker which can be used to read // individual Blobs from a pack. @@ -264,6 +267,10 @@ func NewUnpacker(k *crypto.Key, ldr Loader) (*Unpacker, error) { length := int(binary.LittleEndian.Uint32(buf[p : p+bs])) buf = buf[:p] + if length > maxHeaderSize { + return nil, fmt.Errorf("header too large (%d bytes)", length) + } + // if the header is longer than the preloaded buffer, call the loader again. if length > len(buf) { buf = make([]byte, length) @@ -271,7 +278,10 @@ func NewUnpacker(k *crypto.Key, ldr Loader) (*Unpacker, error) { if err != nil { return nil, fmt.Errorf("Load at -%d failed: %v", len(buf), err) } - buf = buf[:n] + + if n != len(buf) { + return nil, fmt.Errorf("not enough header bytes read: wanted %v, got %v", len(buf), n) + } } buf = buf[len(buf)-length:] From 9f752b8306b95c57bd267bae47f2c8462551331b Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Thu, 25 Aug 2016 21:08:16 +0200 Subject: [PATCH 2/5] Rework function for listing packs --- src/restic/checker/checker.go | 4 ++-- src/restic/pack/pack.go | 23 +++-------------------- src/restic/pack/pack_test.go | 6 +++--- src/restic/repository/repack.go | 6 +++--- src/restic/repository/repository.go | 4 ++-- 5 files changed, 13 insertions(+), 30 deletions(-) diff --git a/src/restic/checker/checker.go b/src/restic/checker/checker.go index bcfa56a04..ff07b7539 100644 --- a/src/restic/checker/checker.go +++ b/src/restic/checker/checker.go @@ -676,13 +676,13 @@ func checkPack(r *repository.Repository, id backend.ID) error { return fmt.Errorf("Pack ID does not match, want %v, got %v", id.Str(), hash.Str()) } - unpacker, err := pack.NewUnpacker(r.Key(), pack.BufferLoader(buf)) + blobs, err := pack.List(r.Key(), pack.BufferLoader(buf)) if err != nil { return err } var errs []error - for i, blob := range unpacker.Entries { + for i, blob := range blobs { debug.Log("Checker.checkPack", " check blob %d: %v", i, blob.ID.Str()) plainBuf := make([]byte, blob.Length) diff --git a/src/restic/pack/pack.go b/src/restic/pack/pack.go index 04b752123..e60c14f76 100644 --- a/src/restic/pack/pack.go +++ b/src/restic/pack/pack.go @@ -228,22 +228,13 @@ func (p *Packer) String() string { return fmt.Sprintf("", len(p.blobs), p.bytes) } -// Unpacker is used to read individual blobs from a pack. -type Unpacker struct { - rd io.ReadSeeker - Entries []Blob - k *crypto.Key -} - const ( preloadHeaderSize = 2048 maxHeaderSize = 16 * 1024 * 1024 ) -// NewUnpacker returns a pointer to Unpacker which can be used to read -// individual Blobs from a pack. -func NewUnpacker(k *crypto.Key, ldr Loader) (*Unpacker, error) { - var err error +// List returns the list of entries found in a pack file. +func List(k *crypto.Key, ldr Loader) (entries []Blob, err error) { // read the last 2048 byte, this will mostly be enough for the header, so // we do not need another round trip. @@ -294,8 +285,6 @@ func NewUnpacker(k *crypto.Key, ldr Loader) (*Unpacker, error) { rd := bytes.NewReader(hdr) - var entries []Blob - pos := uint(0) for { e := headerEntry{} @@ -328,11 +317,5 @@ func NewUnpacker(k *crypto.Key, ldr Loader) (*Unpacker, error) { pos += uint(e.Length) } - up := &Unpacker{ - rd: rd, - k: k, - Entries: entries, - } - - return up, nil + return entries, nil } diff --git a/src/restic/pack/pack_test.go b/src/restic/pack/pack_test.go index e13c9884a..48870b9e6 100644 --- a/src/restic/pack/pack_test.go +++ b/src/restic/pack/pack_test.go @@ -63,13 +63,13 @@ func verifyBlobs(t testing.TB, bufs []Buf, k *crypto.Key, ldr pack.Loader, packS Equals(t, uint(written), packSize) // read and parse it again - np, err := pack.NewUnpacker(k, ldr) + entries, err := pack.List(k, ldr) OK(t, err) - Equals(t, len(np.Entries), len(bufs)) + Equals(t, len(entries), len(bufs)) var buf []byte for i, b := range bufs { - e := np.Entries[i] + e := entries[i] Equals(t, b.id, e.ID) if len(buf) < int(e.Length) { diff --git a/src/restic/repository/repack.go b/src/restic/repository/repack.go index 0498164a4..a72b4e351 100644 --- a/src/restic/repository/repack.go +++ b/src/restic/repository/repack.go @@ -32,14 +32,14 @@ func Repack(repo *Repository, packs backend.IDSet, keepBlobs pack.BlobSet) (err debug.Log("Repack", "pack %v loaded (%d bytes)", packID.Str(), len(buf)) - unpck, err := pack.NewUnpacker(repo.Key(), pack.BufferLoader(buf)) + blobs, err := pack.List(repo.Key(), pack.BufferLoader(buf)) if err != nil { return err } - debug.Log("Repack", "processing pack %v, blobs: %v", packID.Str(), len(unpck.Entries)) + debug.Log("Repack", "processing pack %v, blobs: %v", packID.Str(), len(blobs)) var plaintext []byte - for _, entry := range unpck.Entries { + for _, entry := range blobs { h := pack.Handle{ID: entry.ID, Type: entry.Type} if !keepBlobs.Has(h) { continue diff --git a/src/restic/repository/repository.go b/src/restic/repository/repository.go index 9c8aea25b..9ef948793 100644 --- a/src/restic/repository/repository.go +++ b/src/restic/repository/repository.go @@ -556,12 +556,12 @@ func (r *Repository) ListPack(id backend.ID) ([]pack.Blob, int64, error) { ldr := pack.BackendLoader{Backend: r.Backend(), Handle: h} - unpacker, err := pack.NewUnpacker(r.Key(), ldr) + blobs, err := pack.List(r.Key(), ldr) if err != nil { return nil, 0, err } - return unpacker.Entries, blobInfo.Size, nil + return blobs, blobInfo.Size, nil } // Delete calls backend.Delete() if implemented, and returns an error From 3fd1e4a992a1952976846c55ccb9b721f8d059be Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Thu, 25 Aug 2016 21:49:00 +0200 Subject: [PATCH 3/5] Add backend.ReaderAt --- src/restic/backend/readerat.go | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 src/restic/backend/readerat.go diff --git a/src/restic/backend/readerat.go b/src/restic/backend/readerat.go new file mode 100644 index 000000000..027b34456 --- /dev/null +++ b/src/restic/backend/readerat.go @@ -0,0 +1,19 @@ +package backend + +import ( + "io" +) + +type backendReaderAt struct { + be Backend + h Handle +} + +func (brd backendReaderAt) ReadAt(p []byte, off int64) (n int, err error) { + return brd.be.Load(brd.h, p, off) +} + +// ReaderAt returns an io.ReaderAt for a file in the backend. +func ReaderAt(be Backend, h Handle) io.ReaderAt { + return backendReaderAt{be: be, h: h} +} From de88fb20225e14e2a34ba48997b7bab41341f50e Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Thu, 25 Aug 2016 21:51:07 +0200 Subject: [PATCH 4/5] Simplify pack.List --- src/cmds/restic/cmd_dump.go | 10 ++- src/restic/checker/checker.go | 3 +- src/restic/pack/loader.go | 43 ------------ src/restic/pack/pack.go | 100 +++++++++++++++------------- src/restic/pack/pack_test.go | 14 ++-- src/restic/repository/repack.go | 3 +- src/restic/repository/repository.go | 4 +- 7 files changed, 71 insertions(+), 106 deletions(-) delete mode 100644 src/restic/pack/loader.go diff --git a/src/cmds/restic/cmd_dump.go b/src/cmds/restic/cmd_dump.go index 95efa2574..b7f815b3b 100644 --- a/src/cmds/restic/cmd_dump.go +++ b/src/cmds/restic/cmd_dump.go @@ -126,14 +126,18 @@ func printPacks(repo *repository.Repository, wr io.Writer) error { name := job.Data.(string) h := backend.Handle{Type: backend.Data, Name: name} - ldr := pack.BackendLoader{Backend: repo.Backend(), Handle: h} - unpacker, err := pack.NewUnpacker(repo.Key(), ldr) + blobInfo, err := repo.Backend().Stat(h) if err != nil { return nil, err } - return unpacker.Entries, nil + blobs, err := pack.List(repo.Key(), backend.ReaderAt(repo.Backend(), h), blobInfo.Size) + if err != nil { + return nil, err + } + + return blobs, nil } jobCh := make(chan worker.Job) diff --git a/src/restic/checker/checker.go b/src/restic/checker/checker.go index ff07b7539..0e29c4d50 100644 --- a/src/restic/checker/checker.go +++ b/src/restic/checker/checker.go @@ -1,6 +1,7 @@ package checker import ( + "bytes" "errors" "fmt" "sync" @@ -676,7 +677,7 @@ func checkPack(r *repository.Repository, id backend.ID) error { return fmt.Errorf("Pack ID does not match, want %v, got %v", id.Str(), hash.Str()) } - blobs, err := pack.List(r.Key(), pack.BufferLoader(buf)) + blobs, err := pack.List(r.Key(), bytes.NewReader(buf), int64(len(buf))) if err != nil { return err } diff --git a/src/restic/pack/loader.go b/src/restic/pack/loader.go deleted file mode 100644 index 2b4ada8a7..000000000 --- a/src/restic/pack/loader.go +++ /dev/null @@ -1,43 +0,0 @@ -package pack - -import ( - "errors" - "restic/backend" -) - -// Loader loads data from somewhere at a given offset. In contrast to -// io.ReaderAt, off may be negative, in which case it references a position -// relative to the end of the file (similar to Seek()). -type Loader interface { - Load(p []byte, off int64) (int, error) -} - -// BackendLoader creates a Loader from a Backend and a Handle. -type BackendLoader struct { - Backend backend.Backend - Handle backend.Handle -} - -// Load returns data at the given offset. -func (l BackendLoader) Load(p []byte, off int64) (int, error) { - return l.Backend.Load(l.Handle, p, off) -} - -// BufferLoader allows using a buffer as a Loader. -type BufferLoader []byte - -// Load returns data at the given offset. -func (b BufferLoader) Load(p []byte, off int64) (int, error) { - switch { - case off > int64(len(b)): - return 0, errors.New("offset is larger than data") - case off < -int64(len(b)): - off = 0 - case off < 0: - off = int64(len(b)) + off - } - - b = b[off:] - - return copy(p, b), nil -} diff --git a/src/restic/pack/pack.go b/src/restic/pack/pack.go index e60c14f76..4ffbf4d70 100644 --- a/src/restic/pack/pack.go +++ b/src/restic/pack/pack.go @@ -228,67 +228,73 @@ func (p *Packer) String() string { return fmt.Sprintf("", len(p.blobs), p.bytes) } -const ( - preloadHeaderSize = 2048 - maxHeaderSize = 16 * 1024 * 1024 -) +// readHeaderLength returns the header length read from the end of the file +// encoded in little endian. +func readHeaderLength(rd io.ReaderAt, size int64) (uint32, error) { + off := size - int64(binary.Size(uint32(0))) + + buf := make([]byte, binary.Size(uint32(0))) + n, err := rd.ReadAt(buf, off) + if err != nil { + return 0, err + } + + if n != len(buf) { + return 0, errors.New("not enough bytes read") + } + + return binary.LittleEndian.Uint32(buf), nil +} + +const maxHeaderSize = 16 * 1024 * 1024 + +// readHeader reads the header at the end of rd. size is the length of the +// whole data accessible in rd. +func readHeader(rd io.ReaderAt, size int64) ([]byte, error) { + hl, err := readHeaderLength(rd, size) + if err != nil { + return nil, err + } + + if int64(hl) > size-int64(binary.Size(hl)) { + return nil, errors.New("header is larger than file") + } + + if int64(hl) > maxHeaderSize { + return nil, errors.New("header is larger than maxHeaderSize") + } + + buf := make([]byte, int(hl)) + n, err := rd.ReadAt(buf, size-int64(hl)-int64(binary.Size(hl))) + if err != nil { + return nil, err + } + + if n != len(buf) { + return nil, errors.New("not enough bytes read") + } + + return buf, nil +} // List returns the list of entries found in a pack file. -func List(k *crypto.Key, ldr Loader) (entries []Blob, err error) { - - // read the last 2048 byte, this will mostly be enough for the header, so - // we do not need another round trip. - buf := make([]byte, preloadHeaderSize) - n, err := ldr.Load(buf, -int64(len(buf))) - - if err == io.ErrUnexpectedEOF { - err = nil - buf = buf[:n] - } - +func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []Blob, err error) { + buf, err := readHeader(rd, size) if err != nil { - return nil, fmt.Errorf("Load at -%d failed: %v", len(buf), err) - } - buf = buf[:n] - - bs := binary.Size(uint32(0)) - p := len(buf) - bs - - // read the length from the end of the buffer - length := int(binary.LittleEndian.Uint32(buf[p : p+bs])) - buf = buf[:p] - - if length > maxHeaderSize { - return nil, fmt.Errorf("header too large (%d bytes)", length) + return nil, err } - // if the header is longer than the preloaded buffer, call the loader again. - if length > len(buf) { - buf = make([]byte, length) - n, err := ldr.Load(buf, -int64(len(buf)+bs)) - if err != nil { - return nil, fmt.Errorf("Load at -%d failed: %v", len(buf), err) - } - - if n != len(buf) { - return nil, fmt.Errorf("not enough header bytes read: wanted %v, got %v", len(buf), n) - } - } - - buf = buf[len(buf)-length:] - - // read header hdr, err := crypto.Decrypt(k, buf, buf) if err != nil { return nil, err } - rd := bytes.NewReader(hdr) + hdrRd := bytes.NewReader(hdr) pos := uint(0) for { e := headerEntry{} - err = binary.Read(rd, binary.LittleEndian, &e) + err = binary.Read(hdrRd, binary.LittleEndian, &e) if err == io.EOF { break } diff --git a/src/restic/pack/pack_test.go b/src/restic/pack/pack_test.go index 48870b9e6..82b026e7e 100644 --- a/src/restic/pack/pack_test.go +++ b/src/restic/pack/pack_test.go @@ -47,7 +47,7 @@ func newPack(t testing.TB, k *crypto.Key, lengths []int) ([]Buf, []byte, uint) { return bufs, packData, p.Size() } -func verifyBlobs(t testing.TB, bufs []Buf, k *crypto.Key, ldr pack.Loader, packSize uint) { +func verifyBlobs(t testing.TB, bufs []Buf, k *crypto.Key, rd io.ReaderAt, packSize uint) { written := 0 for _, buf := range bufs { written += len(buf.data) @@ -63,7 +63,7 @@ func verifyBlobs(t testing.TB, bufs []Buf, k *crypto.Key, ldr pack.Loader, packS Equals(t, uint(written), packSize) // read and parse it again - entries, err := pack.List(k, ldr) + entries, err := pack.List(k, rd, int64(packSize)) OK(t, err) Equals(t, len(entries), len(bufs)) @@ -76,7 +76,7 @@ func verifyBlobs(t testing.TB, bufs []Buf, k *crypto.Key, ldr pack.Loader, packS buf = make([]byte, int(e.Length)) } buf = buf[:int(e.Length)] - n, err := ldr.Load(buf, int64(e.Offset)) + n, err := rd.ReadAt(buf, int64(e.Offset)) OK(t, err) buf = buf[:n] @@ -91,7 +91,7 @@ func TestCreatePack(t *testing.T) { bufs, packData, packSize := newPack(t, k, testLens) Equals(t, uint(len(packData)), packSize) - verifyBlobs(t, bufs, k, pack.BufferLoader(packData), packSize) + verifyBlobs(t, bufs, k, bytes.NewReader(packData), packSize) } var blobTypeJSON = []struct { @@ -128,8 +128,7 @@ func TestUnpackReadSeeker(t *testing.T) { handle := backend.Handle{Type: backend.Data, Name: id.String()} OK(t, b.Save(handle, packData)) - ldr := pack.BackendLoader{Backend: b, Handle: handle} - verifyBlobs(t, bufs, k, ldr, packSize) + verifyBlobs(t, bufs, k, backend.ReaderAt(b, handle), packSize) } func TestShortPack(t *testing.T) { @@ -142,6 +141,5 @@ func TestShortPack(t *testing.T) { handle := backend.Handle{Type: backend.Data, Name: id.String()} OK(t, b.Save(handle, packData)) - ldr := pack.BackendLoader{Backend: b, Handle: handle} - verifyBlobs(t, bufs, k, ldr, packSize) + verifyBlobs(t, bufs, k, backend.ReaderAt(b, handle), packSize) } diff --git a/src/restic/repository/repack.go b/src/restic/repository/repack.go index a72b4e351..bf8d7a3a7 100644 --- a/src/restic/repository/repack.go +++ b/src/restic/repository/repack.go @@ -1,6 +1,7 @@ package repository import ( + "bytes" "io" "restic/backend" "restic/crypto" @@ -32,7 +33,7 @@ func Repack(repo *Repository, packs backend.IDSet, keepBlobs pack.BlobSet) (err debug.Log("Repack", "pack %v loaded (%d bytes)", packID.Str(), len(buf)) - blobs, err := pack.List(repo.Key(), pack.BufferLoader(buf)) + blobs, err := pack.List(repo.Key(), bytes.NewReader(buf), int64(len(buf))) if err != nil { return err } diff --git a/src/restic/repository/repository.go b/src/restic/repository/repository.go index 9ef948793..8949b813f 100644 --- a/src/restic/repository/repository.go +++ b/src/restic/repository/repository.go @@ -554,9 +554,7 @@ func (r *Repository) ListPack(id backend.ID) ([]pack.Blob, int64, error) { return nil, 0, err } - ldr := pack.BackendLoader{Backend: r.Backend(), Handle: h} - - blobs, err := pack.List(r.Key(), ldr) + blobs, err := pack.List(r.Key(), backend.ReaderAt(r.Backend(), h), blobInfo.Size) if err != nil { return nil, 0, err } From b2a67d458c11c3f6a09a81e919f213028b7b83a3 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Thu, 25 Aug 2016 22:35:22 +0200 Subject: [PATCH 5/5] Remove unneeded packs without repacking --- src/cmds/restic/cmd_prune.go | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/cmds/restic/cmd_prune.go b/src/cmds/restic/cmd_prune.go index cdc83a17f..755cd1f7e 100644 --- a/src/cmds/restic/cmd_prune.go +++ b/src/cmds/restic/cmd_prune.go @@ -169,6 +169,7 @@ func (cmd CmdPrune) Execute(args []string) error { for h, blob := range idx.Blobs { if !usedBlobs.Has(h) { rewritePacks.Merge(blob.Packs) + continue } if blobCount[h] > 1 { @@ -176,13 +177,40 @@ func (cmd CmdPrune) Execute(args []string) error { } } - cmd.global.Verbosef("will rewrite %d packs\n", len(rewritePacks)) + // find packs that are unneeded + removePacks := backend.NewIDSet() +nextPack: + for packID, p := range idx.Packs { + for _, blob := range p.Entries { + h := pack.Handle{ID: blob.ID, Type: blob.Type} + if usedBlobs.Has(h) { + continue nextPack + } + } + + removePacks.Insert(packID) + + if !rewritePacks.Has(packID) { + return fmt.Errorf("pack %v is unneeded, but not contained in rewritePacks", packID.Str()) + } + + rewritePacks.Delete(packID) + } + + cmd.global.Verbosef("will delete %d packs and rewrite %d packs\n", len(removePacks), len(rewritePacks)) err = repository.Repack(repo, rewritePacks, usedBlobs) if err != nil { return err } + for packID := range removePacks { + err = repo.Backend().Remove(backend.Data, packID.String()) + if err != nil { + cmd.global.Warnf("unable to remove file %v from the repository\n", packID.Str()) + } + } + cmd.global.Verbosef("creating new index\n") for _ = range repo.List(backend.Data, done) {