mirror of
https://github.com/restic/restic.git
synced 2024-12-26 17:57:50 +00:00
170 lines
4.2 KiB
Go
170 lines
4.2 KiB
Go
package index
|
||
|
||
import (
|
||
"hash/maphash"
|
||
|
||
"github.com/restic/restic/internal/restic"
|
||
)
|
||
|
||
// An indexMap is a chained hash table that maps blob IDs to indexEntries.
|
||
// It allows storing multiple entries with the same key.
|
||
//
|
||
// IndexMap uses some optimizations that are not compatible with supporting
|
||
// deletions.
|
||
//
|
||
// The buckets in this hash table contain only pointers, rather than inlined
|
||
// key-value pairs like the standard Go map. This way, only a pointer array
|
||
// needs to be resized when the table grows, preventing memory usage spikes.
|
||
type indexMap struct {
|
||
// The number of buckets is always a power of two and never zero.
|
||
buckets []*indexEntry
|
||
numentries uint
|
||
|
||
mh maphash.Hash
|
||
|
||
free *indexEntry // Free list.
|
||
}
|
||
|
||
const (
|
||
growthFactor = 2 // Must be a power of 2.
|
||
maxLoad = 4 // Max. number of entries per bucket.
|
||
)
|
||
|
||
// add inserts an indexEntry for the given arguments into the map,
|
||
// using id as the key.
|
||
func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32, uncompressedLength uint32) {
|
||
switch {
|
||
case m.numentries == 0: // Lazy initialization.
|
||
m.init()
|
||
case m.numentries >= maxLoad*uint(len(m.buckets)):
|
||
m.grow()
|
||
}
|
||
|
||
h := m.hash(id)
|
||
e := m.newEntry()
|
||
e.id = id
|
||
e.next = m.buckets[h] // Prepend to existing chain.
|
||
e.packIndex = packIdx
|
||
e.offset = offset
|
||
e.length = length
|
||
e.uncompressedLength = uncompressedLength
|
||
|
||
m.buckets[h] = e
|
||
m.numentries++
|
||
}
|
||
|
||
// foreach calls fn for all entries in the map, until fn returns false.
|
||
func (m *indexMap) foreach(fn func(*indexEntry) bool) {
|
||
for _, e := range m.buckets {
|
||
for e != nil {
|
||
if !fn(e) {
|
||
return
|
||
}
|
||
e = e.next
|
||
}
|
||
}
|
||
}
|
||
|
||
// foreachWithID calls fn for all entries with the given id.
|
||
func (m *indexMap) foreachWithID(id restic.ID, fn func(*indexEntry)) {
|
||
if len(m.buckets) == 0 {
|
||
return
|
||
}
|
||
|
||
h := m.hash(id)
|
||
for e := m.buckets[h]; e != nil; e = e.next {
|
||
if e.id != id {
|
||
continue
|
||
}
|
||
fn(e)
|
||
}
|
||
}
|
||
|
||
// get returns the first entry for the given id.
|
||
func (m *indexMap) get(id restic.ID) *indexEntry {
|
||
if len(m.buckets) == 0 {
|
||
return nil
|
||
}
|
||
|
||
h := m.hash(id)
|
||
for e := m.buckets[h]; e != nil; e = e.next {
|
||
if e.id == id {
|
||
return e
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
func (m *indexMap) grow() {
|
||
old := m.buckets
|
||
m.buckets = make([]*indexEntry, growthFactor*len(m.buckets))
|
||
|
||
for _, e := range old {
|
||
for e != nil {
|
||
h := m.hash(e.id)
|
||
next := e.next
|
||
e.next = m.buckets[h]
|
||
m.buckets[h] = e
|
||
e = next
|
||
}
|
||
}
|
||
}
|
||
|
||
func (m *indexMap) hash(id restic.ID) uint {
|
||
// We use maphash to prevent backups of specially crafted inputs
|
||
// from degrading performance.
|
||
// While SHA-256 should be collision-resistant, for hash table indices
|
||
// we use only a few bits of it and finding collisions for those is
|
||
// much easier than breaking the whole algorithm.
|
||
m.mh.Reset()
|
||
_, _ = m.mh.Write(id[:])
|
||
h := uint(m.mh.Sum64())
|
||
return h & uint(len(m.buckets)-1)
|
||
}
|
||
|
||
func (m *indexMap) init() {
|
||
const initialBuckets = 64
|
||
m.buckets = make([]*indexEntry, initialBuckets)
|
||
}
|
||
|
||
func (m *indexMap) len() uint { return m.numentries }
|
||
|
||
func (m *indexMap) newEntry() *indexEntry {
|
||
// We keep a free list of objects to speed up allocation and GC.
|
||
// There's an obvious trade-off here: allocating in larger batches
|
||
// means we allocate faster and the GC has to keep fewer bits to track
|
||
// what we have in use, but it means we waste some space.
|
||
//
|
||
// Then again, allocating each indexEntry separately also wastes space
|
||
// on 32-bit platforms, because the Go malloc has no size class for
|
||
// exactly 52 bytes, so it puts the indexEntry in a 64-byte slot instead.
|
||
// See src/runtime/sizeclasses.go in the Go source repo.
|
||
//
|
||
// The batch size of 4 means we hit the size classes for 4×64=256 bytes
|
||
// (64-bit) and 4×52=208 bytes (32-bit), wasting nothing in malloc on
|
||
// 64-bit and relatively little on 32-bit.
|
||
const entryAllocBatch = 4
|
||
|
||
e := m.free
|
||
if e != nil {
|
||
m.free = e.next
|
||
} else {
|
||
free := new([entryAllocBatch]indexEntry)
|
||
e = &free[0]
|
||
for i := 1; i < len(free)-1; i++ {
|
||
free[i].next = &free[i+1]
|
||
}
|
||
m.free = &free[1]
|
||
}
|
||
|
||
return e
|
||
}
|
||
|
||
type indexEntry struct {
|
||
id restic.ID
|
||
next *indexEntry
|
||
packIndex int // Position in containing Index's packs field.
|
||
offset uint32
|
||
length uint32
|
||
uncompressedLength uint32
|
||
}
|