Merge pull request #60 from ThomasWaldmann/chunk-counts

determine and report chunk counts in chunks index
This commit is contained in:
TW 2015-06-20 14:32:19 +02:00
commit 44ec86460b
3 changed files with 21 additions and 8 deletions

View File

@ -361,14 +361,18 @@ hashindex_get_size(HashIndex *index)
}
static void
hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize, long long *total_unique_size, long long *total_unique_csize)
hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize,
long long *total_unique_size, long long *total_unique_csize,
long long *total_unique_chunks, long long *total_chunks)
{
int64_t size = 0, csize = 0, unique_size = 0, unique_csize = 0;
int64_t size = 0, csize = 0, unique_size = 0, unique_csize = 0, chunks = 0, unique_chunks = 0;
const int32_t *values;
void *key = NULL;
while((key = hashindex_next_key(index, key))) {
values = key + 32;
values = key + index->key_size;
unique_chunks++;
chunks += values[0];
unique_size += values[1];
unique_csize += values[2];
size += values[0] * values[1];
@ -378,4 +382,6 @@ hashindex_summarize(HashIndex *index, long long *total_size, long long *total_cs
*total_csize = csize;
*total_unique_size = unique_size;
*total_unique_csize = unique_csize;
*total_unique_chunks = unique_chunks;
*total_chunks = chunks;
}

View File

@ -11,7 +11,9 @@ cdef extern from "_hashindex.c":
HashIndex *hashindex_read(char *path)
HashIndex *hashindex_init(int capacity, int key_size, int value_size)
void hashindex_free(HashIndex *index)
void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize, long long *unique_size, long long *unique_csize)
void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize,
long long *unique_size, long long *unique_csize,
long long *total_unique_chunks, long long *total_chunks)
int hashindex_get_size(HashIndex *index)
int hashindex_write(HashIndex *index, char *path)
void *hashindex_get(HashIndex *index, void *key)
@ -179,9 +181,11 @@ cdef class ChunkIndex(IndexBase):
return iter
def summarize(self):
cdef long long total_size, total_csize, unique_size, unique_csize
hashindex_summarize(self.index, &total_size, &total_csize, &unique_size, &unique_csize)
return total_size, total_csize, unique_size, unique_csize
cdef long long total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks
hashindex_summarize(self.index, &total_size, &total_csize,
&unique_size, &unique_csize,
&total_unique_chunks, &total_chunks)
return total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks
cdef class ChunkKeyIterator:

View File

@ -174,11 +174,14 @@ class Statistics:
self.usize += csize
def print_(self, label, cache):
total_size, total_csize, unique_size, unique_csize = cache.chunks.summarize()
total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks = cache.chunks.summarize()
print()
print(' Original size Compressed size Deduplicated size')
print('%-15s %20s %20s %20s' % (label, format_file_size(self.osize), format_file_size(self.csize), format_file_size(self.usize)))
print('All archives: %20s %20s %20s' % (format_file_size(total_size), format_file_size(total_csize), format_file_size(unique_csize)))
print()
print(' Unique chunks Total chunks')
print('Chunk index: %20d %20d' % (total_unique_chunks, total_chunks))
def show_progress(self, item=None, final=False):
if not final: