diff --git a/src/cmds/restic/cmd_dump.go b/src/cmds/restic/cmd_dump.go index 95efa2574..b7f815b3b 100644 --- a/src/cmds/restic/cmd_dump.go +++ b/src/cmds/restic/cmd_dump.go @@ -126,14 +126,18 @@ func printPacks(repo *repository.Repository, wr io.Writer) error { name := job.Data.(string) h := backend.Handle{Type: backend.Data, Name: name} - ldr := pack.BackendLoader{Backend: repo.Backend(), Handle: h} - unpacker, err := pack.NewUnpacker(repo.Key(), ldr) + blobInfo, err := repo.Backend().Stat(h) if err != nil { return nil, err } - return unpacker.Entries, nil + blobs, err := pack.List(repo.Key(), backend.ReaderAt(repo.Backend(), h), blobInfo.Size) + if err != nil { + return nil, err + } + + return blobs, nil } jobCh := make(chan worker.Job) diff --git a/src/cmds/restic/cmd_prune.go b/src/cmds/restic/cmd_prune.go index cdc83a17f..755cd1f7e 100644 --- a/src/cmds/restic/cmd_prune.go +++ b/src/cmds/restic/cmd_prune.go @@ -169,6 +169,7 @@ func (cmd CmdPrune) Execute(args []string) error { for h, blob := range idx.Blobs { if !usedBlobs.Has(h) { rewritePacks.Merge(blob.Packs) + continue } if blobCount[h] > 1 { @@ -176,13 +177,40 @@ func (cmd CmdPrune) Execute(args []string) error { } } - cmd.global.Verbosef("will rewrite %d packs\n", len(rewritePacks)) + // find packs that are unneeded + removePacks := backend.NewIDSet() +nextPack: + for packID, p := range idx.Packs { + for _, blob := range p.Entries { + h := pack.Handle{ID: blob.ID, Type: blob.Type} + if usedBlobs.Has(h) { + continue nextPack + } + } + + removePacks.Insert(packID) + + if !rewritePacks.Has(packID) { + return fmt.Errorf("pack %v is unneeded, but not contained in rewritePacks", packID.Str()) + } + + rewritePacks.Delete(packID) + } + + cmd.global.Verbosef("will delete %d packs and rewrite %d packs\n", len(removePacks), len(rewritePacks)) err = repository.Repack(repo, rewritePacks, usedBlobs) if err != nil { return err } + for packID := range removePacks { + err = repo.Backend().Remove(backend.Data, packID.String()) + if err != nil { + cmd.global.Warnf("unable to remove file %v from the repository\n", packID.Str()) + } + } + cmd.global.Verbosef("creating new index\n") for _ = range repo.List(backend.Data, done) { diff --git a/src/restic/backend/readerat.go b/src/restic/backend/readerat.go new file mode 100644 index 000000000..027b34456 --- /dev/null +++ b/src/restic/backend/readerat.go @@ -0,0 +1,19 @@ +package backend + +import ( + "io" +) + +type backendReaderAt struct { + be Backend + h Handle +} + +func (brd backendReaderAt) ReadAt(p []byte, off int64) (n int, err error) { + return brd.be.Load(brd.h, p, off) +} + +// ReaderAt returns an io.ReaderAt for a file in the backend. +func ReaderAt(be Backend, h Handle) io.ReaderAt { + return backendReaderAt{be: be, h: h} +} diff --git a/src/restic/checker/checker.go b/src/restic/checker/checker.go index bcfa56a04..0e29c4d50 100644 --- a/src/restic/checker/checker.go +++ b/src/restic/checker/checker.go @@ -1,6 +1,7 @@ package checker import ( + "bytes" "errors" "fmt" "sync" @@ -676,13 +677,13 @@ func checkPack(r *repository.Repository, id backend.ID) error { return fmt.Errorf("Pack ID does not match, want %v, got %v", id.Str(), hash.Str()) } - unpacker, err := pack.NewUnpacker(r.Key(), pack.BufferLoader(buf)) + blobs, err := pack.List(r.Key(), bytes.NewReader(buf), int64(len(buf))) if err != nil { return err } var errs []error - for i, blob := range unpacker.Entries { + for i, blob := range blobs { debug.Log("Checker.checkPack", " check blob %d: %v", i, blob.ID.Str()) plainBuf := make([]byte, blob.Length) diff --git a/src/restic/pack/loader.go b/src/restic/pack/loader.go deleted file mode 100644 index 2b4ada8a7..000000000 --- a/src/restic/pack/loader.go +++ /dev/null @@ -1,43 +0,0 @@ -package pack - -import ( - "errors" - "restic/backend" -) - -// Loader loads data from somewhere at a given offset. In contrast to -// io.ReaderAt, off may be negative, in which case it references a position -// relative to the end of the file (similar to Seek()). -type Loader interface { - Load(p []byte, off int64) (int, error) -} - -// BackendLoader creates a Loader from a Backend and a Handle. -type BackendLoader struct { - Backend backend.Backend - Handle backend.Handle -} - -// Load returns data at the given offset. -func (l BackendLoader) Load(p []byte, off int64) (int, error) { - return l.Backend.Load(l.Handle, p, off) -} - -// BufferLoader allows using a buffer as a Loader. -type BufferLoader []byte - -// Load returns data at the given offset. -func (b BufferLoader) Load(p []byte, off int64) (int, error) { - switch { - case off > int64(len(b)): - return 0, errors.New("offset is larger than data") - case off < -int64(len(b)): - off = 0 - case off < 0: - off = int64(len(b)) + off - } - - b = b[off:] - - return copy(p, b), nil -} diff --git a/src/restic/pack/pack.go b/src/restic/pack/pack.go index 78159dfa3..4ffbf4d70 100644 --- a/src/restic/pack/pack.go +++ b/src/restic/pack/pack.go @@ -228,68 +228,73 @@ func (p *Packer) String() string { return fmt.Sprintf("", len(p.blobs), p.bytes) } -// Unpacker is used to read individual blobs from a pack. -type Unpacker struct { - rd io.ReadSeeker - Entries []Blob - k *crypto.Key +// readHeaderLength returns the header length read from the end of the file +// encoded in little endian. +func readHeaderLength(rd io.ReaderAt, size int64) (uint32, error) { + off := size - int64(binary.Size(uint32(0))) + + buf := make([]byte, binary.Size(uint32(0))) + n, err := rd.ReadAt(buf, off) + if err != nil { + return 0, err + } + + if n != len(buf) { + return 0, errors.New("not enough bytes read") + } + + return binary.LittleEndian.Uint32(buf), nil } -const preloadHeaderSize = 2048 - -// NewUnpacker returns a pointer to Unpacker which can be used to read -// individual Blobs from a pack. -func NewUnpacker(k *crypto.Key, ldr Loader) (*Unpacker, error) { - var err error - - // read the last 2048 byte, this will mostly be enough for the header, so - // we do not need another round trip. - buf := make([]byte, preloadHeaderSize) - n, err := ldr.Load(buf, -int64(len(buf))) - - if err == io.ErrUnexpectedEOF { - err = nil - buf = buf[:n] - } +const maxHeaderSize = 16 * 1024 * 1024 +// readHeader reads the header at the end of rd. size is the length of the +// whole data accessible in rd. +func readHeader(rd io.ReaderAt, size int64) ([]byte, error) { + hl, err := readHeaderLength(rd, size) if err != nil { - return nil, fmt.Errorf("Load at -%d failed: %v", len(buf), err) - } - buf = buf[:n] - - bs := binary.Size(uint32(0)) - p := len(buf) - bs - - // read the length from the end of the buffer - length := int(binary.LittleEndian.Uint32(buf[p : p+bs])) - buf = buf[:p] - - // if the header is longer than the preloaded buffer, call the loader again. - if length > len(buf) { - buf = make([]byte, length) - n, err := ldr.Load(buf, -int64(len(buf)+bs)) - if err != nil { - return nil, fmt.Errorf("Load at -%d failed: %v", len(buf), err) - } - buf = buf[:n] + return nil, err } - buf = buf[len(buf)-length:] + if int64(hl) > size-int64(binary.Size(hl)) { + return nil, errors.New("header is larger than file") + } + + if int64(hl) > maxHeaderSize { + return nil, errors.New("header is larger than maxHeaderSize") + } + + buf := make([]byte, int(hl)) + n, err := rd.ReadAt(buf, size-int64(hl)-int64(binary.Size(hl))) + if err != nil { + return nil, err + } + + if n != len(buf) { + return nil, errors.New("not enough bytes read") + } + + return buf, nil +} + +// List returns the list of entries found in a pack file. +func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []Blob, err error) { + buf, err := readHeader(rd, size) + if err != nil { + return nil, err + } - // read header hdr, err := crypto.Decrypt(k, buf, buf) if err != nil { return nil, err } - rd := bytes.NewReader(hdr) - - var entries []Blob + hdrRd := bytes.NewReader(hdr) pos := uint(0) for { e := headerEntry{} - err = binary.Read(rd, binary.LittleEndian, &e) + err = binary.Read(hdrRd, binary.LittleEndian, &e) if err == io.EOF { break } @@ -318,11 +323,5 @@ func NewUnpacker(k *crypto.Key, ldr Loader) (*Unpacker, error) { pos += uint(e.Length) } - up := &Unpacker{ - rd: rd, - k: k, - Entries: entries, - } - - return up, nil + return entries, nil } diff --git a/src/restic/pack/pack_test.go b/src/restic/pack/pack_test.go index e13c9884a..82b026e7e 100644 --- a/src/restic/pack/pack_test.go +++ b/src/restic/pack/pack_test.go @@ -47,7 +47,7 @@ func newPack(t testing.TB, k *crypto.Key, lengths []int) ([]Buf, []byte, uint) { return bufs, packData, p.Size() } -func verifyBlobs(t testing.TB, bufs []Buf, k *crypto.Key, ldr pack.Loader, packSize uint) { +func verifyBlobs(t testing.TB, bufs []Buf, k *crypto.Key, rd io.ReaderAt, packSize uint) { written := 0 for _, buf := range bufs { written += len(buf.data) @@ -63,20 +63,20 @@ func verifyBlobs(t testing.TB, bufs []Buf, k *crypto.Key, ldr pack.Loader, packS Equals(t, uint(written), packSize) // read and parse it again - np, err := pack.NewUnpacker(k, ldr) + entries, err := pack.List(k, rd, int64(packSize)) OK(t, err) - Equals(t, len(np.Entries), len(bufs)) + Equals(t, len(entries), len(bufs)) var buf []byte for i, b := range bufs { - e := np.Entries[i] + e := entries[i] Equals(t, b.id, e.ID) if len(buf) < int(e.Length) { buf = make([]byte, int(e.Length)) } buf = buf[:int(e.Length)] - n, err := ldr.Load(buf, int64(e.Offset)) + n, err := rd.ReadAt(buf, int64(e.Offset)) OK(t, err) buf = buf[:n] @@ -91,7 +91,7 @@ func TestCreatePack(t *testing.T) { bufs, packData, packSize := newPack(t, k, testLens) Equals(t, uint(len(packData)), packSize) - verifyBlobs(t, bufs, k, pack.BufferLoader(packData), packSize) + verifyBlobs(t, bufs, k, bytes.NewReader(packData), packSize) } var blobTypeJSON = []struct { @@ -128,8 +128,7 @@ func TestUnpackReadSeeker(t *testing.T) { handle := backend.Handle{Type: backend.Data, Name: id.String()} OK(t, b.Save(handle, packData)) - ldr := pack.BackendLoader{Backend: b, Handle: handle} - verifyBlobs(t, bufs, k, ldr, packSize) + verifyBlobs(t, bufs, k, backend.ReaderAt(b, handle), packSize) } func TestShortPack(t *testing.T) { @@ -142,6 +141,5 @@ func TestShortPack(t *testing.T) { handle := backend.Handle{Type: backend.Data, Name: id.String()} OK(t, b.Save(handle, packData)) - ldr := pack.BackendLoader{Backend: b, Handle: handle} - verifyBlobs(t, bufs, k, ldr, packSize) + verifyBlobs(t, bufs, k, backend.ReaderAt(b, handle), packSize) } diff --git a/src/restic/repository/repack.go b/src/restic/repository/repack.go index 0498164a4..bf8d7a3a7 100644 --- a/src/restic/repository/repack.go +++ b/src/restic/repository/repack.go @@ -1,6 +1,7 @@ package repository import ( + "bytes" "io" "restic/backend" "restic/crypto" @@ -32,14 +33,14 @@ func Repack(repo *Repository, packs backend.IDSet, keepBlobs pack.BlobSet) (err debug.Log("Repack", "pack %v loaded (%d bytes)", packID.Str(), len(buf)) - unpck, err := pack.NewUnpacker(repo.Key(), pack.BufferLoader(buf)) + blobs, err := pack.List(repo.Key(), bytes.NewReader(buf), int64(len(buf))) if err != nil { return err } - debug.Log("Repack", "processing pack %v, blobs: %v", packID.Str(), len(unpck.Entries)) + debug.Log("Repack", "processing pack %v, blobs: %v", packID.Str(), len(blobs)) var plaintext []byte - for _, entry := range unpck.Entries { + for _, entry := range blobs { h := pack.Handle{ID: entry.ID, Type: entry.Type} if !keepBlobs.Has(h) { continue diff --git a/src/restic/repository/repository.go b/src/restic/repository/repository.go index 9c8aea25b..8949b813f 100644 --- a/src/restic/repository/repository.go +++ b/src/restic/repository/repository.go @@ -554,14 +554,12 @@ func (r *Repository) ListPack(id backend.ID) ([]pack.Blob, int64, error) { return nil, 0, err } - ldr := pack.BackendLoader{Backend: r.Backend(), Handle: h} - - unpacker, err := pack.NewUnpacker(r.Key(), ldr) + blobs, err := pack.List(r.Key(), backend.ReaderAt(r.Backend(), h), blobInfo.Size) if err != nil { return nil, 0, err } - return unpacker.Entries, blobInfo.Size, nil + return blobs, blobInfo.Size, nil } // Delete calls backend.Delete() if implemented, and returns an error