From 8d0ba55ecd42b761846b3555958f0180d99e4a77 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Wed, 30 Sep 2020 15:48:39 +0200 Subject: [PATCH] backup: Fix possible deadlock of scanner goroutine When the backup is interrupted for some reason while the scanner is still active this could lead to a deadlock. Interruptions are triggered by canceling the context object used by both the backup progress UI and the scanner. It is possible that a context is canceled between the respective check in the scanner and it calling the `ReportTotal` method of the UI. The latter method sends a message to the UI goroutine. However, a canceled context will also stop that goroutine, which can cause the channel send operation to block indefinitely. This is resolved by adding a `closed` channel which is closed once the UI goroutine is stopped and serves as an escape hatch for reported UI updates. This change covers not just the ReportTotal method but all potentially affected methods of the progress UI implementation. --- changelog/unreleased/issue-2834 | 7 +++++ internal/ui/backup.go | 54 +++++++++++++++++++++----------- internal/ui/json/backup.go | 55 ++++++++++++++++++++++----------- 3 files changed, 80 insertions(+), 36 deletions(-) create mode 100644 changelog/unreleased/issue-2834 diff --git a/changelog/unreleased/issue-2834 b/changelog/unreleased/issue-2834 new file mode 100644 index 000000000..42d63cd31 --- /dev/null +++ b/changelog/unreleased/issue-2834 @@ -0,0 +1,7 @@ +Bugfix: Fix rare cases of backup command hanging forever + +We've fixed an issue with the backup progress reporting which could cause +restic to hang forever right before finishing a backup. + +https://github.com/restic/restic/issues/2834 +https://github.com/restic/restic/pull/2963 diff --git a/internal/ui/backup.go b/internal/ui/backup.go index e47f19f04..29a93573d 100644 --- a/internal/ui/backup.go +++ b/internal/ui/backup.go @@ -41,6 +41,7 @@ type Backup struct { errCh chan struct{} workerCh chan fileWorkerMessage finished chan struct{} + closed chan struct{} summary struct { sync.Mutex @@ -71,6 +72,7 @@ func NewBackup(term *termstatus.Terminal, verbosity uint) *Backup { errCh: make(chan struct{}), workerCh: make(chan fileWorkerMessage), finished: make(chan struct{}), + closed: make(chan struct{}), } } @@ -88,6 +90,7 @@ func (b *Backup) Run(ctx context.Context) error { t := time.NewTicker(time.Second) defer t.Stop() + defer close(b.closed) for { select { @@ -192,20 +195,27 @@ func (b *Backup) ScannerError(item string, fi os.FileInfo, err error) error { // Error is the error callback function for the archiver, it prints the error and returns nil. func (b *Backup) Error(item string, fi os.FileInfo, err error) error { b.E("error: %v\n", err) - b.errCh <- struct{}{} + select { + case b.errCh <- struct{}{}: + case <-b.closed: + } return nil } // StartFile is called when a file is being processed by a worker. func (b *Backup) StartFile(filename string) { - b.workerCh <- fileWorkerMessage{ - filename: filename, + select { + case b.workerCh <- fileWorkerMessage{filename: filename}: + case <-b.closed: } } // CompleteBlob is called for all saved blobs for files. func (b *Backup) CompleteBlob(filename string, bytes uint64) { - b.processedCh <- counter{Bytes: bytes} + select { + case b.processedCh <- counter{Bytes: bytes}: + case <-b.closed: + } } func formatPercent(numerator uint64, denominator uint64) string { @@ -270,22 +280,28 @@ func (b *Backup) CompleteItem(item string, previous, current *restic.Node, s arc if current == nil { // error occurred, tell the status display to remove the line - b.workerCh <- fileWorkerMessage{ - filename: item, - done: true, + select { + case b.workerCh <- fileWorkerMessage{filename: item, done: true}: + case <-b.closed: } return } switch current.Type { case "file": - b.processedCh <- counter{Files: 1} - b.workerCh <- fileWorkerMessage{ - filename: item, - done: true, + select { + case b.processedCh <- counter{Files: 1}: + case <-b.closed: + } + select { + case b.workerCh <- fileWorkerMessage{filename: item, done: true}: + case <-b.closed: } case "dir": - b.processedCh <- counter{Dirs: 1} + select { + case b.processedCh <- counter{Dirs: 1}: + case <-b.closed: + } } if current.Type == "dir" { @@ -310,10 +326,9 @@ func (b *Backup) CompleteItem(item string, previous, current *restic.Node, s arc } } else if current.Type == "file" { - - b.workerCh <- fileWorkerMessage{ - done: true, - filename: item, + select { + case b.workerCh <- fileWorkerMessage{done: true, filename: item}: + case <-b.closed: } if previous == nil { @@ -342,7 +357,7 @@ func (b *Backup) CompleteItem(item string, previous, current *restic.Node, s arc func (b *Backup) ReportTotal(item string, s archiver.ScanStats) { select { case b.totalCh <- counter{Files: s.Files, Dirs: s.Dirs, Bytes: s.Bytes}: - case <-b.finished: + case <-b.closed: } if item == "" { @@ -357,7 +372,10 @@ func (b *Backup) ReportTotal(item string, s archiver.ScanStats) { // Finish prints the finishing messages. func (b *Backup) Finish(snapshotID restic.ID) { - close(b.finished) + select { + case b.finished <- struct{}{}: + case <-b.closed: + } b.P("\n") b.P("Files: %5d new, %5d changed, %5d unmodified\n", b.summary.Files.New, b.summary.Files.Changed, b.summary.Files.Unchanged) diff --git a/internal/ui/json/backup.go b/internal/ui/json/backup.go index 9380422b9..d9be31b24 100644 --- a/internal/ui/json/backup.go +++ b/internal/ui/json/backup.go @@ -42,6 +42,7 @@ type Backup struct { errCh chan struct{} workerCh chan fileWorkerMessage finished chan struct{} + closed chan struct{} summary struct { sync.Mutex @@ -72,6 +73,7 @@ func NewBackup(term *termstatus.Terminal, verbosity uint) *Backup { errCh: make(chan struct{}), workerCh: make(chan fileWorkerMessage), finished: make(chan struct{}), + closed: make(chan struct{}), } } @@ -103,6 +105,7 @@ func (b *Backup) Run(ctx context.Context) error { t := time.NewTicker(time.Second) defer t.Stop() + defer close(b.closed) for { select { @@ -200,20 +203,27 @@ func (b *Backup) Error(item string, fi os.FileInfo, err error) error { During: "archival", Item: item, }) - b.errCh <- struct{}{} + select { + case b.errCh <- struct{}{}: + case <-b.closed: + } return nil } // StartFile is called when a file is being processed by a worker. func (b *Backup) StartFile(filename string) { - b.workerCh <- fileWorkerMessage{ - filename: filename, + select { + case b.workerCh <- fileWorkerMessage{filename: filename}: + case <-b.closed: } } // CompleteBlob is called for all saved blobs for files. func (b *Backup) CompleteBlob(filename string, bytes uint64) { - b.processedCh <- counter{Bytes: bytes} + select { + case b.processedCh <- counter{Bytes: bytes}: + case <-b.closed: + } } // CompleteItem is the status callback function for the archiver when a @@ -225,9 +235,9 @@ func (b *Backup) CompleteItem(item string, previous, current *restic.Node, s arc if current == nil { // error occurred, tell the status display to remove the line - b.workerCh <- fileWorkerMessage{ - filename: item, - done: true, + select { + case b.workerCh <- fileWorkerMessage{filename: item, done: true}: + case <-b.closed: } return } @@ -236,13 +246,19 @@ func (b *Backup) CompleteItem(item string, previous, current *restic.Node, s arc switch current.Type { case "file": - b.processedCh <- counter{Files: 1} - b.workerCh <- fileWorkerMessage{ - filename: item, - done: true, + select { + case b.processedCh <- counter{Files: 1}: + case <-b.closed: + } + select { + case b.workerCh <- fileWorkerMessage{filename: item, done: true}: + case <-b.closed: } case "dir": - b.processedCh <- counter{Dirs: 1} + select { + case b.processedCh <- counter{Dirs: 1}: + case <-b.closed: + } } if current.Type == "dir" { @@ -291,10 +307,9 @@ func (b *Backup) CompleteItem(item string, previous, current *restic.Node, s arc } } else if current.Type == "file" { - - b.workerCh <- fileWorkerMessage{ - done: true, - filename: item, + select { + case b.workerCh <- fileWorkerMessage{done: true, filename: item}: + case <-b.closed: } if previous == nil { @@ -345,7 +360,7 @@ func (b *Backup) CompleteItem(item string, previous, current *restic.Node, s arc func (b *Backup) ReportTotal(item string, s archiver.ScanStats) { select { case b.totalCh <- counter{Files: uint64(s.Files), Dirs: uint64(s.Dirs), Bytes: s.Bytes}: - case <-b.finished: + case <-b.closed: } if item == "" { @@ -365,7 +380,11 @@ func (b *Backup) ReportTotal(item string, s archiver.ScanStats) { // Finish prints the finishing messages. func (b *Backup) Finish(snapshotID restic.ID) { - close(b.finished) + select { + case b.finished <- struct{}{}: + case <-b.closed: + } + b.print(summaryOutput{ MessageType: "summary", FilesNew: b.summary.Files.New,