Renamed chunker to chunkifier.

2025-02-23 14:41:43 +00:00 · 2010-02-28 16:20:19 +01:00 · 2010-02-28 16:20:19 +01:00 · 6c73f5dc86
commit 6c73f5dc86
parent 0cca830981
2 changed files with 18 additions and 15 deletions
--- a/dedupstore/archiver.py
+++ b/dedupstore/archiver.py
@ -6,7 +6,7 @@
 import cPickle
 from optparse import OptionParser

-from chunker import chunker, checksum
+from chunkifier import chunkify, checksum
 from store import Store


@ -70,12 +70,12 @@ def save(self):

    def add_chunk(self, data):
        sum = checksum(data)
+        data = zlib.compress(data)
        #print 'chunk %d: %d' % (len(data), sum)
-        hash = struct.pack('I', sum) + hashlib.sha1(data).digest()
-        if not self.seen_chunk(hash):
-            zdata = zlib.compress(data)
-            size = len(zdata)
-            self.store.put(NS_CHUNKS, hash, zdata)
+        id = struct.pack('I', sum) + hashlib.sha1(data).digest()
+        if not self.seen_chunk(id):
+            size = len(data)
+            self.store.put(NS_CHUNKS, id, data)
        else:
            size = 0
            #print 'seen chunk', hash.encode('hex')
@ -164,7 +164,7 @@ def verify_archive(self, archive_name):
                print item['path'], '...',
                for chunk in item['chunks']:
                    data = self.store.get(NS_CHUNKS, chunk)
-                    if hashlib.sha1(data).digest() != chunk:
+                    if hashlib.sha1(data).digest() != chunk[4:]:
                        print 'ERROR'
                        break
                else:
@ -184,7 +184,10 @@ def extract_archive(self, archive_name):
            if item['type'] == 'FILE':
                with open(item['path'], 'wb') as fd:
                    for chunk in item['chunks']:
-                        fd.write(zlib.decompress(self.store.get(NS_CHUNKS, chunk)))
+                        data = self.store.get(NS_CHUNKS, chunk)
+                        if hashlib.sha1(data).digest() != chunk[4:]:
+                            raise Exception('Invalid chunk checksum')
+                        fd.write(zlib.decompress(data))

    def process_dir(self, path, cache):
        path = path.lstrip('/\\:')
@ -198,7 +201,7 @@ def process_file(self, path, cache):
            origsize = 0
            compsize = 0
            chunks = []
-            for chunk in chunker(fd, CHUNKSIZE, self.cache.summap):
+            for chunk in chunkify(fd, CHUNKSIZE, self.cache.summap):
                origsize += len(chunk)
                id, size = cache.add_chunk(chunk)
                compsize += size
--- a/dedupstore/chunkifier.py
+++ b/dedupstore/chunkifier.py
@ -28,25 +28,25 @@ def roll_checksum(sum, remove, add, len):
    return (s1 & 0xffff) + ((s2 & 0xffff) << 16)


-def chunker(fd, chunk_size, chunks):
+def chunkify(fd, chunk_size, chunks):
    """
    >>> fd = StringIO.StringIO('ABCDEFGHIJKLMN')
-    >>> list(chunker(fd, 4, {}))
+    >>> list(chunkify(fd, 4, {}))
    ['ABCD', 'EFGH', 'IJ', 'KLMN']
    
    >>> fd = StringIO.StringIO('ABCDEFGHIJKLMN')
    >>> chunks = {44564754: True} # 'BCDE'
-    >>> list(chunker(fd, 4, chunks))
+    >>> list(chunkify(fd, 4, chunks))
    ['A', 'BCDE', 'FGHI', 'J', 'KLMN']

    >>> fd = StringIO.StringIO('ABCDEFGHIJKLMN')
    >>> chunks = {44564754: True, 48496938: True} # 'BCDE', 'HIJK'
-    >>> list(chunker(fd, 4, chunks))
+    >>> list(chunkify(fd, 4, chunks))
    ['A', 'BCDE', 'FG', 'HIJK', 'LMN']

    >>> fd = StringIO.StringIO('ABCDEFGHIJKLMN')
    >>> chunks = {43909390: True, 50463030: True} # 'ABCD', 'KLMN'
-    >>> list(chunker(fd, 4, chunks))
+    >>> list(chunkify(fd, 4, chunks))
    ['ABCD', 'EFGH', 'IJ', 'KLMN']
    """
    data = 'X' + fd.read(chunk_size * 3)
@ -62,7 +62,7 @@ def chunker(fd, chunk_size, chunks):
        if len(data) - i <= chunk_size:  # EOF?
            if len(data) > chunk_size + 1:
                yield data[1:len(data) - chunk_size]
-                yield data[:chunk_size]
+                yield data[-chunk_size:]
            else:
                yield data[1:]
            return