This repository has been archived by the owner on Apr 14, 2024. It is now read-only.
forked from graingert/gitdb-speedups
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path_delta_apply.c
1154 lines (945 loc) · 26.8 KB
/
_delta_apply.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#include <_delta_apply.h>
#include <stdint.h>
#include <assert.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
typedef unsigned long long ull;
typedef unsigned int uint;
typedef unsigned char uchar;
typedef unsigned short ushort;
typedef uchar bool;
// Constants
const ull gDIV_grow_by = 100;
// DELTA STREAM ACCESS
///////////////////////
inline
ull msb_size(const uchar** datap, const uchar* top)
{
const uchar *data = *datap;
ull cmd, size = 0;
uint i = 0;
do {
cmd = *data++;
size |= (cmd & 0x7f) << i;
i += 7;
} while (cmd & 0x80 && data < top);
*datap = data;
return size;
}
// TOP LEVEL STREAM INFO
/////////////////////////////
typedef struct {
const uchar *tds; // Toplevel delta stream
const uchar *cstart; // start of the chunks
Py_ssize_t tdslen; // size of tds in bytes
Py_ssize_t target_size; // size of the target buffer which can hold all data
uint num_chunks; // amount of chunks in the delta stream
PyObject *parent_object;
} ToplevelStreamInfo;
void TSI_init(ToplevelStreamInfo* info)
{
info->tds = NULL;
info->cstart = NULL;
info->tdslen = 0;
info->num_chunks = 0;
info->target_size = 0;
info->parent_object = 0;
}
void TSI_destroy(ToplevelStreamInfo* info)
{
#ifdef DEBUG
fprintf(stderr, "TSI_destroy: %p\n", info);
#endif
if (info->parent_object){
Py_DECREF(info->parent_object);
info->parent_object = NULL;
} else if (info->tds){
PyMem_Free((void*)info->tds);
}
info->tds = NULL;
info->cstart = NULL;
info->tdslen = 0;
info->num_chunks = 0;
}
inline
const uchar* TSI_end(ToplevelStreamInfo* info)
{
return info->tds + info->tdslen;
}
inline
const uchar* TSI_first(ToplevelStreamInfo* info)
{
return info->cstart;
}
// set the stream, and initialize it
// initialize our set stream to point to the first chunk
// Fill in the header information, which is the base and target size
inline
void TSI_set_stream(ToplevelStreamInfo* info, const uchar* stream)
{
info->tds = stream;
info->cstart = stream;
assert(info->tds && info->tdslen);
// init stream
const uchar* tdsend = TSI_end(info);
msb_size(&info->cstart, tdsend); // base size
info->target_size = msb_size(&info->cstart, tdsend);
}
// duplicate the data currently owned by the parent object drop its refcount
// return 1 on success
bool TSI_copy_stream_from_object(ToplevelStreamInfo* info)
{
assert(info->parent_object);
uchar* ptmp = PyMem_Malloc(info->tdslen);
if (!ptmp){
return 0;
}
uint ofs = (uint)(info->cstart - info->tds);
memcpy((void*)ptmp, info->tds, info->tdslen);
info->tds = ptmp;
info->cstart = ptmp + ofs;
Py_DECREF(info->parent_object);
info->parent_object = 0;
return 1;
}
// Transfer ownership of the given stream into our instance. The amount of chunks
// remains the same, and needs to be set by the caller
void TSI_replace_stream(ToplevelStreamInfo* info, const uchar* stream, uint streamlen)
{
assert(info->parent_object == 0);
uint ofs = (uint)(info->cstart - info->tds);
if (info->tds){
PyMem_Free((void*)info->tds);
}
info->tds = stream;
info->cstart = info->tds + ofs;
info->tdslen = streamlen;
}
// DELTA CHUNK
////////////////
// Internal Delta Chunk Objects
// They are just used to keep information parsed from a stream
// The data pointer is always shared
typedef struct {
ull to;
uint ts;
uint so;
const uchar* data;
} DeltaChunk;
// forward declarations
const uchar* next_delta_info(const uchar*, DeltaChunk*);
inline
void DC_init(DeltaChunk* dc, ull to, ull ts, ull so, const uchar* data)
{
dc->to = to;
dc->ts = ts;
dc->so = so;
dc->data = NULL;
}
inline
ull DC_rbound(const DeltaChunk* dc)
{
return dc->to + dc->ts;
}
inline
void DC_print(const DeltaChunk* dc, const char* prefix)
{
fprintf(stderr, "%s-dc: to = %i, ts = %i, so = %i, data = %p\n", prefix, (int)dc->to, dc->ts, dc->so, dc->data);
}
// Apply
inline
void DC_apply(const DeltaChunk* dc, const uchar* base, PyObject* writer, PyObject* tmpargs)
{
PyObject* buffer = 0;
if (dc->data){
buffer = PyBuffer_FromMemory((void*)dc->data, dc->ts);
} else {
buffer = PyBuffer_FromMemory((void*)(base + dc->so), dc->ts);
}
if (PyTuple_SetItem(tmpargs, 0, buffer)){
assert(0);
}
// tuple steals reference, and will take care about the deallocation
PyObject_Call(writer, tmpargs, NULL);
}
// Encode the information in the given delta chunk and write the byte-stream
// into the given output stream
// It will be copied into the given bounds, the given size must be the final size
// and work with the given relative offset - hence the bounds are assumed to be
// correct and to fit within the unaltered dc
inline
void DC_encode_to(const DeltaChunk* dc, uchar** pout, uint ofs, uint size)
{
uchar* out = *pout;
if (dc->data){
*out++ = (uchar)size;
memcpy(out, dc->data+ofs, size);
out += size;
} else {
uchar i = 0x80;
uchar* op = out++;
uint moff = dc->so+ofs;
if (moff & 0x000000ff)
*out++ = moff >> 0, i |= 0x01;
if (moff & 0x0000ff00)
*out++ = moff >> 8, i |= 0x02;
if (moff & 0x00ff0000)
*out++ = moff >> 16, i |= 0x04;
if (moff & 0xff000000)
*out++ = moff >> 24, i |= 0x08;
if (size & 0x00ff)
*out++ = size >> 0, i |= 0x10;
if (size & 0xff00)
*out++ = size >> 8, i |= 0x20;
*op = i;
}
*pout = out;
}
// Return: amount of bytes one would need to encode dc
inline
ushort DC_count_encode_bytes(const DeltaChunk* dc)
{
if (dc->data){
return 1 + dc->ts; // cmd byte + actual data bytes
} else {
ushort c = 1; // cmd byte
uint ts = dc->ts;
ull so = dc->so;
// offset
c += (so & 0x000000FF) > 0;
c += (so & 0x0000FF00) > 0;
c += (so & 0x00FF0000) > 0;
c += (so & 0xFF000000) > 0;
// size - max size is 0x10000, its encoded with 0 size bits
c += (ts & 0x000000FF) > 0;
c += (ts & 0x0000FF00) > 0;
return c;
}
}
// DELTA INFO
/////////////
typedef struct {
uint dso; // delta stream offset, relative to the very start of the stream
uint to; // target offset (cache)
} DeltaInfo;
// DELTA INFO VECTOR
//////////////////////
typedef struct {
DeltaInfo *mem; // Memory for delta infos
uint di_last_size; // size of the last element - we can't compute it using the next bound
const uchar *dstream; // borrowed ointer to delta stream we index
Py_ssize_t size; // Amount of DeltaInfos
Py_ssize_t reserved_size; // Reserved amount of DeltaInfos
} DeltaInfoVector;
// Reserve enough memory to hold the given amount of delta chunks
// Return 1 on success
// NOTE: added a minimum allocation to assure reallocation is not done
// just for a single additional entry. DIVs change often, and reallocs are expensive
inline
int DIV_reserve_memory(DeltaInfoVector* vec, uint num_dc)
{
if (num_dc <= vec->reserved_size){
return 1;
}
#ifdef DEBUG
bool was_null = vec->mem == NULL;
#endif
if (vec->mem == NULL){
vec->mem = PyMem_Malloc(num_dc * sizeof(DeltaInfo));
} else {
vec->mem = PyMem_Realloc(vec->mem, num_dc * sizeof(DeltaInfo));
}
if (vec->mem == NULL){
Py_FatalError("Could not allocate memory for append operation");
}
vec->reserved_size = num_dc;
#ifdef DEBUG
const char* format = "Allocated %i bytes at %p, to hold up to %i chunks\n";
if (!was_null)
format = "Re-allocated %i bytes at %p, to hold up to %i chunks\n";
fprintf(stderr, format, (int)(vec->reserved_size * sizeof(DeltaInfo)), vec->mem, (int)vec->reserved_size);
#endif
return vec->mem != NULL;
}
/*
Grow the delta chunk list by the given amount of bytes.
This may trigger a realloc, but will do nothing if the reserved size is already
large enough.
Return 1 on success, 0 on failure
*/
inline
int DIV_grow_by(DeltaInfoVector* vec, uint num_dc)
{
return DIV_reserve_memory(vec, vec->reserved_size + num_dc);
}
int DIV_init(DeltaInfoVector* vec, ull initial_size)
{
vec->mem = NULL;
vec->dstream = NULL;
vec->size = 0;
vec->reserved_size = 0;
vec->di_last_size = 0;
return DIV_grow_by(vec, initial_size);
}
inline
Py_ssize_t DIV_len(const DeltaInfoVector* vec)
{
return vec->size;
}
inline
uint DIV_lbound(const DeltaInfoVector* vec)
{
assert(vec->size && vec->mem);
return vec->mem->to;
}
// Return item at index
inline
DeltaInfo* DIV_get(const DeltaInfoVector* vec, Py_ssize_t i)
{
assert(i < vec->size && vec->mem);
return &vec->mem[i];
}
// Return last item
inline
DeltaInfo* DIV_last(const DeltaInfoVector* vec)
{
return DIV_get(vec, vec->size-1);
}
inline
int DIV_empty(const DeltaInfoVector* vec)
{
return vec->size == 0;
}
// Return end pointer of the vector
inline
const DeltaInfo* DIV_end(const DeltaInfoVector* vec)
{
assert(!DIV_empty(vec));
return vec->mem + vec->size;
}
// return first item in vector
inline
DeltaInfo* DIV_first(const DeltaInfoVector* vec)
{
assert(!DIV_empty(vec));
return vec->mem;
}
// return rbound offset in bytes. We use information contained in the
// vec to do that
inline
uint DIV_info_rbound(const DeltaInfoVector* vec, const DeltaInfo* di)
{
if (DIV_last(vec) == di){
return di->to + vec->di_last_size;
} else {
return (di+1)->to;
}
}
// return size of the given delta info item
inline
uint DIV_info_size2(const DeltaInfoVector* vec, const DeltaInfo* di, const DeltaInfo* const veclast)
{
if (veclast == di){
return vec->di_last_size;
} else {
return (di+1)->to - di->to;
}
}
// return size of the given delta info item
inline
uint DIV_info_size(const DeltaInfoVector* vec, const DeltaInfo* di)
{
return DIV_info_size2(vec, di, DIV_last(vec));
}
void DIV_destroy(DeltaInfoVector* vec)
{
if (vec->mem){
#ifdef DEBUG
fprintf(stderr, "DIV_destroy: %p\n", (void*)vec->mem);
#endif
PyMem_Free(vec->mem);
vec->size = 0;
vec->reserved_size = 0;
vec->mem = 0;
}
}
// Reset this vector so that its existing memory can be filled again.
// Memory will be kept, but not cleaned up
inline
void DIV_forget_members(DeltaInfoVector* vec)
{
vec->size = 0;
}
// Reset the vector so that its size will be zero
// It will keep its memory though, and hence can be filled again
inline
void DIV_reset(DeltaInfoVector* vec)
{
if (vec->size == 0)
return;
vec->size = 0;
}
// Append one chunk to the end of the list, and return a pointer to it
// It will not have been initialized !
inline
DeltaInfo* DIV_append(DeltaInfoVector* vec)
{
if (vec->size + 1 > vec->reserved_size){
DIV_grow_by(vec, gDIV_grow_by);
}
DeltaInfo* next = vec->mem + vec->size;
vec->size += 1;
return next;
}
// Return delta chunk being closest to the given absolute offset
inline
DeltaInfo* DIV_closest_chunk(const DeltaInfoVector* vec, ull ofs)
{
assert(vec->mem);
ull lo = 0;
ull hi = vec->size;
ull mid;
DeltaInfo* di;
while (lo < hi)
{
mid = (lo + hi) / 2;
di = vec->mem + mid;
if (di->to > ofs){
hi = mid;
} else if ((DIV_info_rbound(vec, di) > ofs) | (di->to == ofs)) {
return di;
} else {
lo = mid + 1;
}
}
return DIV_last(vec);
}
// Return the amount of chunks a slice at the given spot would have, as well as
// its size in bytes it would have if the possibly partial chunks would be encoded
// and added to the spot marked by sdc
uint DIV_count_slice_bytes(const DeltaInfoVector* src, uint ofs, uint size)
{
uint num_bytes = 0;
DeltaInfo* cdi = DIV_closest_chunk(src, ofs);
DeltaChunk dc;
DC_init(&dc, 0, 0, 0, NULL);
// partial overlap
if (cdi->to != ofs) {
const ull relofs = ofs - cdi->to;
const uint cdisize = DIV_info_size(src, cdi);
const uint max_size = cdisize - relofs < size ? cdisize - relofs : size;
size -= max_size;
// get the size in bytes the info would have
next_delta_info(src->dstream + cdi->dso, &dc);
dc.so += relofs;
dc.ts = max_size;
num_bytes += DC_count_encode_bytes(&dc);
cdi += 1;
if (size == 0){
return num_bytes;
}
}
const DeltaInfo* const vecend = DIV_end(src);
const uchar* nstream;
for( ;cdi < vecend; ++cdi){
nstream = next_delta_info(src->dstream + cdi->dso, &dc);
if (dc.ts < size) {
num_bytes += nstream - (src->dstream + cdi->dso);
size -= dc.ts;
} else {
dc.ts = size;
num_bytes += DC_count_encode_bytes(&dc);
size = 0;
break;
}
}
assert(size == 0);
return num_bytes;
}
// Write a slice as defined by its absolute offset in bytes and its size into the given
// destination memory. The individual chunks written will be a byte copy of the source
// data chunk stream
// Return: number of chunks in the slice
uint DIV_copy_slice_to(const DeltaInfoVector* src, uchar** dest, ull tofs, uint size)
{
assert(DIV_lbound(src) <= tofs);
assert((tofs + size) <= DIV_info_rbound(src, DIV_last(src)));
DeltaChunk dc;
DC_init(&dc, 0, 0, 0, NULL);
DeltaInfo* cdi = DIV_closest_chunk(src, tofs);
uint num_chunks = 0;
// partial overlap
if (cdi->to != tofs) {
const uint relofs = tofs - cdi->to;
next_delta_info(src->dstream + cdi->dso, &dc);
const uint max_size = dc.ts - relofs < size ? dc.ts - relofs : size;
size -= max_size;
// adjust dc proportions
DC_encode_to(&dc, dest, relofs, max_size);
num_chunks += 1;
cdi += 1;
if (size == 0){
return num_chunks;
}
}
const uchar* dstream = src->dstream + cdi->dso;
const uchar* nstream = dstream;
for( ; nstream; dstream = nstream)
{
num_chunks += 1;
nstream = next_delta_info(dstream, &dc);
if (dc.ts < size) {
memcpy(*dest, dstream, nstream - dstream);
*dest += nstream - dstream;
size -= dc.ts;
} else {
DC_encode_to(&dc, dest, 0, size);
size = 0;
break;
}
}
assert(size == 0);
return num_chunks;
}
// Take slices of div into the corresponding area of the tsi, which is the topmost
// delta to apply.
bool DIV_connect_with_base(ToplevelStreamInfo* tsi, DeltaInfoVector* div)
{
assert(tsi->num_chunks);
uint num_bytes = 0;
const uchar* data = TSI_first(tsi);
const uchar* dend = TSI_end(tsi);
DeltaChunk dc;
DC_init(&dc, 0, 0, 0, NULL);
// COMPUTE SIZE OF TARGET STREAM
/////////////////////////////////
for (;data < dend;)
{
data = next_delta_info(data, &dc);
// Data chunks don't need processing
if (dc.data){
num_bytes += 1 + dc.ts;
continue;
}
num_bytes += DIV_count_slice_bytes(div, dc.so, dc.ts);
}
assert(DC_rbound(&dc) == tsi->target_size);
// GET NEW DELTA BUFFER
////////////////////////
uchar *const dstream = PyMem_Malloc(num_bytes);
if (!dstream){
return 0;
}
data = TSI_first(tsi);
const uchar *ndata = data;
dend = TSI_end(tsi);
uint num_chunks = 0;
uchar* ds = dstream;
DC_init(&dc, 0, 0, 0, NULL);
// pick slices from the delta and put them into the new stream
for (; data < dend; data = ndata)
{
ndata = next_delta_info(data, &dc);
// Data chunks don't need processing
if (dc.data){
// just copy it over
memcpy((void*)ds, (void*)data, ndata - data);
ds += ndata - data;
num_chunks += 1;
continue;
}
// Copy Chunks
num_chunks += DIV_copy_slice_to(div, &ds, dc.so, dc.ts);
}
assert(ds - dstream == num_bytes);
assert(num_chunks >= tsi->num_chunks);
assert(DC_rbound(&dc) == tsi->target_size);
// finally, replace the streams
TSI_replace_stream(tsi, dstream, num_bytes);
tsi->cstart = dstream; // we have NO header !
assert(tsi->tds == dstream);
tsi->num_chunks = num_chunks;
return 1;
}
// DELTA CHUNK LIST (PYTHON)
/////////////////////////////
// Internally, it has nothing to do with a ChunkList anymore though
typedef struct {
PyObject_HEAD
// -----------
ToplevelStreamInfo istream;
} DeltaChunkList;
int DCL_init(DeltaChunkList*self, PyObject *args, PyObject *kwds)
{
if(args && PySequence_Size(args) > 0){
PyErr_SetString(PyExc_ValueError, "Too many arguments");
return -1;
}
TSI_init(&self->istream);
return 0;
}
void DCL_dealloc(DeltaChunkList* self)
{
TSI_destroy(&(self->istream));
}
PyObject* DCL_py_rbound(DeltaChunkList* self)
{
return PyLong_FromUnsignedLongLong(self->istream.target_size);
}
// Write using a write function, taking remaining bytes from a base buffer
PyObject* DCL_apply(DeltaChunkList* self, PyObject* args)
{
PyObject* pybuf = 0;
PyObject* writeproc = 0;
if (!PyArg_ParseTuple(args, "OO", &pybuf, &writeproc)){
PyErr_BadArgument();
return NULL;
}
if (!PyObject_CheckReadBuffer(pybuf)){
PyErr_SetString(PyExc_ValueError, "First argument must be a buffer-compatible object, like a string, or a memory map");
return NULL;
}
if (!PyCallable_Check(writeproc)){
PyErr_SetString(PyExc_ValueError, "Second argument must be a writer method with signature write(buf)");
return NULL;
}
const uchar* base;
Py_ssize_t baselen;
PyObject_AsReadBuffer(pybuf, (const void**)&base, &baselen);
PyObject* tmpargs = PyTuple_New(1);
const uchar* data = TSI_first(&self->istream);
const uchar* const dend = TSI_end(&self->istream);
DeltaChunk dc;
DC_init(&dc, 0, 0, 0, NULL);
while (data < dend){
data = next_delta_info(data, &dc);
DC_apply(&dc, base, writeproc, tmpargs);
}
Py_DECREF(tmpargs);
Py_RETURN_NONE;
}
PyMethodDef DCL_methods[] = {
{"apply", (PyCFunction)DCL_apply, METH_VARARGS, "Apply the given iterable of delta streams" },
{"rbound", (PyCFunction)DCL_py_rbound, METH_NOARGS, NULL},
{NULL} /* Sentinel */
};
PyTypeObject DeltaChunkListType = {
PyObject_HEAD_INIT(NULL)
0, /*ob_size*/
"DeltaChunkList", /*tp_name*/
sizeof(DeltaChunkList), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)DCL_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash */
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT, /*tp_flags*/
"Minimal Delta Chunk List",/* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
DCL_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)DCL_init, /* tp_init */
0, /* tp_alloc */
0, /* tp_new */
};
// Makes a new copy of the DeltaChunkList - you have to do everything yourselve
// in C ... want C++ !!
DeltaChunkList* DCL_new_instance(void)
{
DeltaChunkList* dcl = (DeltaChunkList*) PyType_GenericNew(&DeltaChunkListType, 0, 0);
assert(dcl);
DCL_init(dcl, 0, 0);
return dcl;
}
// Read the next delta chunk from the given stream and advance it
// dc will contain the parsed information, its offset must be set by
// the previous call of next_delta_info, which implies it should remain the
// same instance between the calls.
// Return the altered uchar pointer, reassign it to the input data
inline
const uchar* next_delta_info(const uchar* data, DeltaChunk* dc)
{
const char cmd = *data++;
if (cmd & 0x80)
{
uint cp_off = 0, cp_size = 0;
if (cmd & 0x01) cp_off = *data++;
if (cmd & 0x02) cp_off |= (*data++ << 8);
if (cmd & 0x04) cp_off |= (*data++ << 16);
if (cmd & 0x08) cp_off |= ((unsigned) *data++ << 24);
if (cmd & 0x10) cp_size = *data++;
if (cmd & 0x20) cp_size |= (*data++ << 8);
if (cmd & 0x40) cp_size |= (*data++ << 16); // this should never get hit with current deltas ...
if (cp_size == 0) cp_size = 0x10000;
dc->to += dc->ts;
dc->data = NULL;
dc->so = cp_off;
dc->ts = cp_size;
} else if (cmd) {
// Just share the data
dc->to += dc->ts;
dc->data = data;
dc->ts = cmd;
dc->so = 0;
data += cmd;
} else {
PyErr_SetString(PyExc_RuntimeError, "Encountered an unsupported delta cmd: 0");
assert(0);
return NULL;
}
return data;
}
// Return amount of chunks encoded in the given delta stream
// If read_header is True, then the header msb chunks will be read first.
// Otherwise, the stream is assumed to be scrubbed one past the header
uint compute_chunk_count(const uchar* data, const uchar* dend, bool read_header)
{
// read header
if (read_header){
msb_size(&data, dend);
msb_size(&data, dend);
}
DeltaChunk dc;
DC_init(&dc, 0, 0, 0, NULL);
uint num_chunks = 0;
while (data < dend)
{
data = next_delta_info(data, &dc);
num_chunks += 1;
}// END handle command opcodes
return num_chunks;
}
PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
{
// obtain iterator
PyObject* stream_iter = 0;
if (!PyIter_Check(dstreams)){
stream_iter = PyObject_GetIter(dstreams);
if (!stream_iter){
PyErr_SetString(PyExc_RuntimeError, "Couldn't obtain iterator for streams");
return NULL;
}
} else {
stream_iter = dstreams;
}
DeltaInfoVector div;
ToplevelStreamInfo tdsinfo;
TSI_init(&tdsinfo);
DIV_init(&div, 0);
// GET TOPLEVEL DELTA STREAM
int error = 0;
PyObject* ds = 0;
unsigned int dsi = 0; // delta stream index we process
ds = PyIter_Next(stream_iter);
if (!ds){
error = 1;
goto _error;
}
dsi += 1;
tdsinfo.parent_object = PyObject_CallMethod(ds, "read", 0);
if (!PyObject_CheckReadBuffer(tdsinfo.parent_object)){
Py_DECREF(ds);
error = 1;
goto _error;
}
PyObject_AsReadBuffer(tdsinfo.parent_object, (const void**)&tdsinfo.tds, &tdsinfo.tdslen);
if (tdsinfo.tdslen > pow(2, 32)){
// parent object is deallocated by info structure
Py_DECREF(ds);
PyErr_SetString(PyExc_RuntimeError, "Cannot handle deltas larger than 4GB");
tdsinfo.parent_object = 0;
error = 1;
goto _error;
}
Py_DECREF(ds);
// let it officially know, and initialize its internal state
TSI_set_stream(&tdsinfo, tdsinfo.tds);
// INTEGRATE ANCESTOR DELTA STREAMS
for (ds = PyIter_Next(stream_iter); ds != NULL; ds = PyIter_Next(stream_iter), ++dsi)
{
// Its important to initialize this before the next block which can jump
// to code who needs this to exist !
PyObject* db = 0;
// When processing the first delta, we know we will have to alter the tds
// Hence we copy it and deallocate the parent object
if (dsi == 1) {
if (!TSI_copy_stream_from_object(&tdsinfo)){
PyErr_SetString(PyExc_RuntimeError, "Could not allocate memory to copy toplevel buffer");
// info structure takes care of the parent_object
error = 1;
goto loop_end;
}
tdsinfo.num_chunks = compute_chunk_count(tdsinfo.cstart, TSI_end(&tdsinfo), 0);
}
db = PyObject_CallMethod(ds, "read", 0);
if (!PyObject_CheckReadBuffer(db)){
error = 1;
PyErr_SetString(PyExc_RuntimeError, "Returned buffer didn't support the buffer protocol");
goto loop_end;
}
// Fill the stream info structure
const uchar* data;
Py_ssize_t dlen;
PyObject_AsReadBuffer(db, (const void**)&data, &dlen);
const uchar* const dstart = data;
const uchar* const dend = data + dlen;
div.dstream = dstart;
if (dlen > pow(2, 32)){
error = 1;
PyErr_SetString(PyExc_RuntimeError, "Cannot currently handle deltas larger than 4GB");
goto loop_end;
}
// READ HEADER
msb_size(&data, dend);
const ull target_size = msb_size(&data, dend);
DIV_reserve_memory(&div, compute_chunk_count(data, dend, 0));
// parse command stream
DeltaInfo* di = 0; // temporary pointer
DeltaChunk dc;