2010-10-13 20:07:55 +00:00
|
|
|
#include <Python.h>
|
|
|
|
#include <structmember.h>
|
2010-02-28 19:22:45 +00:00
|
|
|
|
2010-11-04 20:19:01 +00:00
|
|
|
#define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
|
|
|
|
#define ABS(X) ((X) < 0 ? (-(X)) : (X))
|
|
|
|
|
2010-03-03 21:52:57 +00:00
|
|
|
static unsigned long int
|
|
|
|
checksum(const unsigned char *data, int len, unsigned long int sum)
|
|
|
|
{
|
|
|
|
unsigned long int s1, s2, i;
|
|
|
|
s1 = sum & 0xffff;
|
|
|
|
s2 = sum >> 16;
|
|
|
|
for(i=0; i < len; i++)
|
|
|
|
{
|
|
|
|
s1 += data[i] + 1;
|
|
|
|
s2 += s1;
|
|
|
|
}
|
|
|
|
return ((s2 & 0xffff) << 16) | (s1 & 0xffff);
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned long int
|
|
|
|
roll_checksum(unsigned long int sum, unsigned char remove, unsigned char add, int len)
|
|
|
|
{
|
|
|
|
unsigned long int s1, s2;
|
|
|
|
s1 = sum & 0xffff;
|
|
|
|
s2 = sum >> 16;
|
|
|
|
s1 -= remove - add;
|
|
|
|
s2 -= len * (remove + 1) - s1;
|
|
|
|
return ((s2 & 0xffff) << 16) | (s1 & 0xffff);
|
|
|
|
}
|
|
|
|
|
2010-02-28 19:22:45 +00:00
|
|
|
typedef struct {
|
2010-03-03 21:52:57 +00:00
|
|
|
PyObject_HEAD
|
2010-11-04 20:19:01 +00:00
|
|
|
int chunk_size, window_size, i, last, eof, done, buf_size, data_len, seed;
|
2010-10-13 20:07:55 +00:00
|
|
|
PyObject *chunks, *fd;
|
|
|
|
unsigned long int sum;
|
2010-03-03 21:52:57 +00:00
|
|
|
unsigned char *data, add, remove;
|
2010-02-28 19:22:45 +00:00
|
|
|
} ChunkifyIter;
|
|
|
|
|
2010-03-03 21:52:57 +00:00
|
|
|
static PyObject*
|
|
|
|
ChunkifyIter_iter(PyObject *self)
|
|
|
|
{
|
2010-10-13 20:07:55 +00:00
|
|
|
ChunkifyIter *c = (ChunkifyIter *)self;
|
|
|
|
c->data_len = 0;
|
|
|
|
c->done = 0;
|
|
|
|
c->eof = 0;
|
|
|
|
c->i = 0;
|
|
|
|
c->sum = 0;
|
2010-11-04 20:19:01 +00:00
|
|
|
c->last = 0;
|
2010-03-03 21:52:57 +00:00
|
|
|
Py_INCREF(self);
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
ChunkifyIter_dealloc(PyObject *self)
|
2010-02-28 19:22:45 +00:00
|
|
|
{
|
2010-03-03 21:52:57 +00:00
|
|
|
ChunkifyIter *c = (ChunkifyIter *)self;
|
|
|
|
Py_DECREF(c->fd);
|
|
|
|
free(c->data);
|
|
|
|
self->ob_type->tp_free(self);
|
2010-02-28 19:22:45 +00:00
|
|
|
}
|
|
|
|
|
2010-03-03 21:52:57 +00:00
|
|
|
static PyObject*
|
|
|
|
ChunkifyIter_iternext(PyObject *self)
|
2010-02-28 19:22:45 +00:00
|
|
|
{
|
2010-03-03 21:52:57 +00:00
|
|
|
ChunkifyIter *c = (ChunkifyIter *)self;
|
2010-11-04 20:19:01 +00:00
|
|
|
int initial = c->window_size;
|
|
|
|
|
2010-03-03 21:52:57 +00:00
|
|
|
if(c->done)
|
|
|
|
{
|
|
|
|
PyErr_SetNone(PyExc_StopIteration);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
for(;;)
|
|
|
|
{
|
2010-10-13 20:07:55 +00:00
|
|
|
if(c->i == c->buf_size)
|
2010-03-03 21:52:57 +00:00
|
|
|
{
|
2010-10-13 20:07:55 +00:00
|
|
|
int diff = c->last + 1 - c->window_size;
|
2010-11-04 20:19:01 +00:00
|
|
|
assert(diff >= 0);
|
2010-10-13 20:07:55 +00:00
|
|
|
memmove(c->data, c->data + diff, c->buf_size - diff);
|
|
|
|
c->i -= diff;
|
|
|
|
c->last -= diff;
|
|
|
|
c->data_len -= diff;
|
|
|
|
assert(c->i >= 0);
|
|
|
|
assert(c->last >= -1);
|
|
|
|
assert(c->data_len >= 0);
|
2010-03-03 21:52:57 +00:00
|
|
|
}
|
2010-10-13 20:07:55 +00:00
|
|
|
if(c->i == c->data_len)
|
2010-03-03 21:52:57 +00:00
|
|
|
{
|
|
|
|
PyObject *data = PyObject_CallMethod(c->fd, "read", "i", c->buf_size - c->data_len);
|
|
|
|
int n = PyString_Size(data);
|
|
|
|
memcpy(c->data + c->data_len, PyString_AsString(data), n);
|
|
|
|
c->data_len += n;
|
|
|
|
Py_DECREF(data);
|
|
|
|
}
|
|
|
|
if(c->i == c->data_len)
|
|
|
|
{
|
2010-11-04 20:19:01 +00:00
|
|
|
if(c->last < c->i) {
|
2010-03-03 21:52:57 +00:00
|
|
|
c->done = 1;
|
2010-11-04 20:19:01 +00:00
|
|
|
return PyString_FromStringAndSize((char *)(c->data + c->last),
|
|
|
|
c->data_len - c->last);
|
2010-03-03 21:52:57 +00:00
|
|
|
}
|
2010-10-13 20:07:55 +00:00
|
|
|
PyErr_SetNone(PyExc_StopIteration);
|
|
|
|
return NULL;
|
2010-03-03 21:52:57 +00:00
|
|
|
}
|
2010-11-04 20:19:01 +00:00
|
|
|
if(initial)
|
2010-03-03 21:52:57 +00:00
|
|
|
{
|
2010-11-04 20:19:01 +00:00
|
|
|
int bytes = MIN(initial, c->data_len - c->i);
|
|
|
|
initial -= bytes;
|
|
|
|
c->sum = checksum(c->data + c->i, bytes, 0);
|
|
|
|
c->i += bytes;
|
2010-03-03 21:52:57 +00:00
|
|
|
}
|
2010-10-13 20:07:55 +00:00
|
|
|
else
|
2010-03-03 21:52:57 +00:00
|
|
|
{
|
2010-10-13 20:07:55 +00:00
|
|
|
c->sum = roll_checksum(c->sum,
|
|
|
|
c->data[c->i - c->window_size],
|
|
|
|
c->data[c->i],
|
|
|
|
c->window_size);
|
2010-11-04 20:19:01 +00:00
|
|
|
c->i++;
|
2010-03-03 21:52:57 +00:00
|
|
|
}
|
2010-11-04 22:00:50 +00:00
|
|
|
if((c->sum % c->chunk_size) == c->seed ||
|
|
|
|
(c->i == c->buf_size && c->last <= c->window_size))
|
2010-03-03 22:27:40 +00:00
|
|
|
{
|
2010-10-13 20:07:55 +00:00
|
|
|
int old_last = c->last;
|
2010-11-04 20:19:01 +00:00
|
|
|
c->last = c->i;
|
|
|
|
return PyString_FromStringAndSize((char *)(c->data + old_last),
|
2010-10-13 20:07:55 +00:00
|
|
|
c->last - old_last);
|
2010-03-03 22:27:40 +00:00
|
|
|
}
|
2010-03-03 21:52:57 +00:00
|
|
|
}
|
2010-02-28 19:22:45 +00:00
|
|
|
PyErr_SetNone(PyExc_StopIteration);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static PyTypeObject ChunkifyIterType = {
|
|
|
|
PyObject_HEAD_INIT(NULL)
|
|
|
|
0, /*ob_size*/
|
|
|
|
"_chunkifier._ChunkifyIter", /*tp_name*/
|
|
|
|
sizeof(ChunkifyIter), /*tp_basicsize*/
|
|
|
|
0, /*tp_itemsize*/
|
2010-03-03 21:52:57 +00:00
|
|
|
ChunkifyIter_dealloc, /*tp_dealloc*/
|
2010-02-28 19:22:45 +00:00
|
|
|
0, /*tp_print*/
|
|
|
|
0, /*tp_getattr*/
|
|
|
|
0, /*tp_setattr*/
|
|
|
|
0, /*tp_compare*/
|
|
|
|
0, /*tp_repr*/
|
|
|
|
0, /*tp_as_number*/
|
|
|
|
0, /*tp_as_sequence*/
|
|
|
|
0, /*tp_as_mapping*/
|
|
|
|
0, /*tp_hash */
|
|
|
|
0, /*tp_call*/
|
|
|
|
0, /*tp_str*/
|
|
|
|
0, /*tp_getattro*/
|
|
|
|
0, /*tp_setattro*/
|
|
|
|
0, /*tp_as_buffer*/
|
|
|
|
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER,
|
|
|
|
/* tp_flags: Py_TPFLAGS_HAVE_ITER tells python to
|
|
|
|
use tp_iter and tp_iternext fields. */
|
|
|
|
"", /* tp_doc */
|
|
|
|
0, /* tp_traverse */
|
|
|
|
0, /* tp_clear */
|
|
|
|
0, /* tp_richcompare */
|
|
|
|
0, /* tp_weaklistoffset */
|
|
|
|
ChunkifyIter_iter, /* tp_iter: __iter__() method */
|
|
|
|
ChunkifyIter_iternext /* tp_iternext: next() method */
|
|
|
|
};
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
chunkify(PyObject *self, PyObject *args)
|
|
|
|
{
|
2010-10-13 20:07:55 +00:00
|
|
|
PyObject *fd;
|
2010-11-04 20:19:01 +00:00
|
|
|
int chunk_size, window_size, seed;
|
2010-03-03 21:52:57 +00:00
|
|
|
ChunkifyIter *c;
|
2010-02-28 19:22:45 +00:00
|
|
|
|
2010-11-04 20:19:01 +00:00
|
|
|
if (!PyArg_ParseTuple(args, "Oiii", &fd, &chunk_size, &window_size, &seed))
|
2010-03-03 21:52:57 +00:00
|
|
|
{
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
if (!(c = PyObject_New(ChunkifyIter, &ChunkifyIterType)))
|
|
|
|
{
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
PyObject_Init((PyObject *)c, &ChunkifyIterType);
|
2010-10-13 20:07:55 +00:00
|
|
|
c->buf_size = 10 * 1024 * 1024;
|
2010-03-03 21:52:57 +00:00
|
|
|
c->data = malloc(c->buf_size);
|
|
|
|
c->fd = fd;
|
|
|
|
c->chunk_size = chunk_size;
|
2010-10-13 20:07:55 +00:00
|
|
|
c->window_size = window_size;
|
2010-11-04 20:19:01 +00:00
|
|
|
c->seed = seed % chunk_size;
|
2010-03-03 21:52:57 +00:00
|
|
|
Py_INCREF(fd);
|
|
|
|
return (PyObject *)c;
|
|
|
|
}
|
2010-03-01 22:39:14 +00:00
|
|
|
|
2010-03-03 21:52:57 +00:00
|
|
|
|
2010-02-28 19:22:45 +00:00
|
|
|
static PyMethodDef ChunkifierMethods[] = {
|
|
|
|
{"chunkify", chunkify, METH_VARARGS, ""},
|
|
|
|
{NULL, NULL, 0, NULL} /* Sentinel */
|
|
|
|
};
|
|
|
|
|
|
|
|
PyMODINIT_FUNC
|
2010-03-01 22:39:14 +00:00
|
|
|
init_speedups(void)
|
2010-02-28 19:22:45 +00:00
|
|
|
{
|
|
|
|
PyObject* m;
|
|
|
|
|
|
|
|
ChunkifyIterType.tp_new = PyType_GenericNew;
|
|
|
|
if (PyType_Ready(&ChunkifyIterType) < 0) return;
|
|
|
|
|
2010-03-01 22:39:14 +00:00
|
|
|
m = Py_InitModule("_speedups", ChunkifierMethods);
|
2010-03-09 21:27:37 +00:00
|
|
|
}
|