summaryrefslogtreecommitdiff
path: root/compll.c
blob: c9f9a6b011f2ad931625135498c1296fbe248aea (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562


#include "compll.h"

#include <string.h>
#include <stdlib.h>
#include <limits.h>
#include <assert.h>


/* global configuration */

/* minimum lengh of the data part of a node, setting this too low can badly
 * increase the length of the unused node list and slow down operations of
 * alloc() and free(). Setting this too high would waste memory.
 * Must be at least 2, in order to hold the unused nodes linked list. */
#define MIN_NODE_SIZE 4


unsigned int   conf_alignment; /* in bit count, actual alignment bytes = 2^a */
unsigned int   conf_align_mask; /* the bitmask where the bits-to-align-to are one and the rest is zero */
unsigned int   conf_blocksize = 0; /* 0 = library not initialized */
unsigned short conf_ublockcount;
compll_compress_callback   conf_compress;
compll_decompress_callback conf_decompress;



/* an "archived" block, every memory block is present in an array of ablock structs. */
struct ablock {
  /* pointer to the compressed block, NULL if it is in an uncompressed state and has been modified. */
  unsigned char *data;
  /* size of the compressed data (only used if data != NULL) */
  unsigned int data_len;
  /* index to the ublocks array, -1 if it's not in an uncompressed state */
  short ublock;
  /* size of the largest unused node (as a multiple of conf_alignment) */
  unsigned short free;
};
/* An unused ablock is identified with data = NULL and ublock = -1 */


/* an uncompressed block, there's a fixed-size array of this */
struct ublock {
  /* pointer to the uncompressed data */
  unsigned char *data;
  /* index to the ablocks array, -1 if it's not used for any block */
  int ablock;
  /* last time this block has been accessed */
  unsigned int lasta;
};


/* a global instruction counter to indicate when a block has last been
 * accessed, will be incremented each time a node is accessed
 * Note: The code should be able to handle an overflow */
unsigned int instruction_count = 0;


/* the ablocks and ublocks arrays, will be allocated when the library is initialized.
 * ablocks holds all the memory blocks, and its size is variable
 * ublocks has a fixed size of conf_ublockcount */
struct ablock *ablocks = NULL;
struct ublock *ublocks = NULL;

/* number of ablock entries in the ablocks array */
int ablocks_size = 0;

/* a static buffer used to write temporary compression data to */
unsigned char *compress_buf = NULL;



/* useful macros */
#define min(a,b) ((a)<(b)?(a):(b))
#define max(a,b) ((a)>(b)?(a):(b))

/* #define align(l) ( (l) + ((conf_alignment - ((l) % conf_alignment)) % conf_alignment)) */
#define align(l) ( ((l) + conf_align_mask) & ~conf_align_mask )

/* readable way to access an unsigned short within an (unsigned char *) */
#define datshort(ch) (*(unsigned short *) (ch))

/* macros to access metadata of a node within a memory block, ch = start of node data */
#define node_head(ch) datshort((ch) - 2)
#define node_foot(ch) datshort((ch) + ((node_head(ch) >> 1) << conf_alignment) - 2)
#define node_fnext(ch) datshort(ch)

/* update the 'lasta' field in an uncompressed block and increase the
 * instruction counter. */
#define ublock_access(i) (ublocks[i].lasta = ++instruction_count)


/* Find a slot in the uncompressed blocks and optionally re-compress the block
 * that is being replaced. Also increases the last access time for the new block.
 * Returns -1 on error. */
static int block_uslot() {
  int i, cur = 0;
  unsigned int cur_lasta = UINT_MAX, comp_size;
  struct ablock *a;

  /* find new slot */
  for(i=0; i<conf_ublockcount; i++) {
    /* slot is unused? use it! */
    if(ublocks[i].ablock == -1) {
      cur = i;
      break;
    }
    /* otherwise, keep track of which block has the lowest lasta */
    if(ublocks[i].lasta < cur_lasta) {
      cur = i;
      cur_lasta = ublocks[i].lasta;
    }
  }

  /* archive the old block if the slot was used */
  if(ublocks[cur].ablock != -1) {
    a = &(ablocks[ublocks[cur].ablock]);
    /* if it's not in a compressed state, (re)compress it */
    if(a->data == NULL) {
      comp_size = conf_compress(ublocks[cur].data, conf_blocksize, compress_buf, conf_blocksize*2);
      if(comp_size > conf_blocksize*2 || (a->data = malloc(comp_size)) == NULL)
        return -1;
      a->data_len = comp_size;
      memcpy(a->data, compress_buf, comp_size);
    }
    /* it's not in an uncompressed state anymore */
    a->ublock = -1;
  }

  /* allocate a new block if this slot didn't have memory allocated yet */
  if(ublocks[cur].data == NULL)
    if((ublocks[cur].data = malloc(conf_blocksize)) == NULL)
      return -1;

  /* increase the access count */
  ublock_access(cur);

  return cur;
}


/* create a new empty memory block, returns its ablocks index or -1 on error */
static int block_alloc() {
  int ub,ab,i;

  /* get a new slot in the ublocks */
  if((ub = block_uslot()) < 0)
    return -1;

  /* initialize the data with zeroes */
  memset(ublocks[ub].data, 0, conf_blocksize);

  /* get a new slot in the ablocks array.
   * TODO: looping through the ablocks array can be slow, we could cache the
   *       index to the first unused slot to significantly lower the number of
   *       iterations in normal cases (though the worst case will still be a
   *       full pass through the array) */
  for(ab=0; ab<ablocks_size; ab++)
    if(ablocks[ab].data == NULL && ablocks[ab].ublock == -1)
      break;

  /* no free slot found? increase the ablocks array.
   * The first time the array is allocated it is large anough for 64 blocks,
   * each time it has to be grown the size is multiplied with 1.5 */
  if(ab == ablocks_size) {
    if(ab == 0) {
      ablocks_size = 64;
      if((ablocks = malloc(ablocks_size * sizeof(struct ablock))) == NULL)
        return -1;
    } else {
      ablocks_size = 3*(ablocks_size>>1);
      if((ablocks = realloc(ablocks, ablocks_size * sizeof(struct ablock))) == NULL)
        return -1;
    }
    for(i=ab; i<ablocks_size; i++) {
      ablocks[i].data = NULL;
      ablocks[i].ublock = -1;
    }
  }

  /* update the references */
  ablocks[ab].ublock = ub;
  ublocks[ub].ablock = ab;

  return ab;
}


/* Load an archived block into an uncompressed slot if it's not in an
 * uncompressed state already, and updates the access time. returns the ublocks
 * index or -1 on error */
static int block_load(int ab) {
  int ub;

  /* don't do anything if the block is already loaded */
  if(ablocks[ab].ublock >= 0) {
    ublock_access(ablocks[ab].ublock);
    return ablocks[ab].ublock;
  }

  /* get a new slot in the ublocks */
  if((ub = block_uslot()) < 0)
    return -1;

  /* decompress the block and update the references */
  conf_decompress(ablocks[ab].data, ablocks[ab].data_len, ublocks[ub].data, conf_blocksize);

  ablocks[ab].ublock = ub;
  ublocks[ub].ablock = ab;

  return ub;
}


/* find a memory block which has a node free of at least the specified (packed)
 * size, creating a new block if all others are full. The block will be
 * available in ublocks.
 * returns the ablocks index or -1 on error*/
static int block_getfree(short size) {
  int ab, start, length;
  unsigned char *dat;

  /* find a suitable memory block, load it, and return.
   * TODO: this loops through the whole ablocks array in the worst case, which
   *       is not really fast. Maybe we'll have to do some caching? */
  for(ab=0; ab<ablocks_size; ab++)
    if((ablocks[ab].data != NULL || ablocks[ab].ublock != -1) && ablocks[ab].free >= size) {
      if(block_load(ab) == -1)
        return -1;
      return ab;
    }

  /* none found, create a new block and initialize the block structure */
  if((ab = block_alloc()) == -1)
    return -1;
  dat = ublocks[ablocks[ab].ublock].data;

  /* the first two bytes contain the offset to the first unused node, this one
   * can be found at offset align(2+2) - 2 bytes for this pointer and 2 for the
   * metadata of the unused node */
  start = align(4);
  datshort(dat) = start >> conf_alignment;

  /* metadata 2 bytes before the start of the unused node, this indicates the
   * length of the node and that the previous node should be considered as "in
   * use" */
  length = (conf_blocksize - start) & ~conf_align_mask;
  datshort(dat+start-2) = ((length >> conf_alignment) << 1) | 1;

  /* the metadata of the last node can be found at (dat+start+length-2) and
   * should be 0 (length = 0, not in use), which it already is so we don't have
   * to do anything. */

  ablocks[ab].free = length >> conf_alignment;
  return ab;
}




/*
 * The following is the result of a brainstorm session on how to store the node
 * size into the 15 bits while taking into account the alignment and the fact
 * that we need to use the size field to determine the location of the next
 * node.
 *
 * alignment = 8,  metadata = 2
 * node size = 1: 15, 2: 16, 3: 14
 *
 * stored size = node data = align(node size)
 * start of next node metadata = align(stored size + 2) - 2
 *         1: 7                  2: 6                 3: 8
 *  6: [ 2 s=16    ]      6: [ 2 s=16    ]     6: [ 2 s=16    ]
 *  8: [16 data    ]      8: [16 data    ]     8: [16 data    ]
 * 24: [ 6 padding ]     24: [ 6 padding ]    24: [ 6 padding ]
 * 30: [ 2 metadata]     30: [ 2 metadata]    30: [ 2 metadata]
 * 32: [.. next    ]     32: [.. next    ]    32: [.. next    ]
 *
 * stored size = align(node size + 2)
 * start of next node metadata = node data = stored size - 2
 *         1: 7                  2: 6                 3: 0
 *  6: [ 2 s=24    ]      6: [ 2 s=24    ]     6: [ 2 s=16    ]
 *  8: [22 data    ]      8: [22 data    ]     8: [14 data    ]
 * 30: [ 2 metadata]     30: [ 2 metadata]    22: [ 2 metadata]
 * 32: [.. next    ]     32: [.. next    ]    24: [.. next    ]
 *
 * If you were able to figure out what that odd visialisation above is supposed
 * to mean, you'll see that the second method wastes less space and is easier
 * to work with.
 */

/* Layout of a new memory block:
 *
 *  0: [ 2 - offset of first node]
 * fm: [ 2 - size of the first node, previous node = used]
 * fd: [ns - unallocated data]
 * lm: [ 2 - next node size = 0, previous node = unused]
 *
 * fm = metadata of first node = fd - 2
 * fd = offset of first node   = align(4)
 * ns = size of the first node = floor((blocksize - fd) / alignment) = (blocksize - fd) & ~conf_align_mask
 * lm = metadata of last node  = fd+ns-2
 *
 * This means that the largest node *contents* within a block can be calculated with:
 *   ((blocksize - fd) & ~conf_align_mask) - 2
 * Assuming an alignment of 16 bytes and assuming worst case, this can be simplified to:
 *   blocksize - 16 - 16 - 2 = blocksize - 34
 */

/* Splitting an unused node in two smaller nodes:
 * example has 4 bytes alignment
 *
 *  2: [ 2 st=36   ] -> unused node
 *  4: [34 datat   ]
 * 38: [ 2 metanext] -> the next node
 * 40: [.. datanext]
 *
 *          v
 *
 *  2: [ 2 s1=16   ] -> node is now in use
 *  4: [14 data1   ]
 * 18: [ 2 s2=20   ] -> s2 = st - s1 (always properly aligned)
 * 20: [18 data2   ]
 * 38: [ 2 metanext] -> next node, unmodified
 * 40: [.. datanext]
 *
 * A node can be split as long as s2 >= MIN_NODE_SIZE
 */



/* Add the specified node at the correct position in the unused nodes list and
 * update ablock.free if necessary */
static void node_addfree(int ab, unsigned short off) {
  unsigned char *dat = ublocks[ablocks[ab].ublock].data;
  unsigned short psize = node_head(dat+off);
  int cur;

  /* find the node within the free nodes list after which this node should be
   * inserted. (that is, of which the next node is larger than or equal to this
   * one) */
  cur = datshort(dat) << conf_alignment;
  if(cur) {
    while(1) {
      if(!node_fnext(dat+cur) || node_head(dat + (node_fnext(dat+cur) << conf_alignment)) >= psize)
        break;
      cur = node_fnext(dat+cur) << conf_alignment;
    }
  }

  /* if this is the last node in the list (thus the largest free node in this
   * block), update ablock.free */
  if(!cur || !node_fnext(dat+cur))
    ablocks[ab].free = psize >> 1;

  /* update references */
  node_fnext(dat+off) = cur ? node_fnext(dat+cur) : 0;
  if(cur)
    node_fnext(dat+cur) = off >> conf_alignment;
  else
    datshort(dat) = off >> conf_alignment;
}


/* Allocate a node of the specified (packed) size within a memory block.
 * Assumes the block is available uncompressed.
 * Returns the byte offset of the node data within the block.*/
static int node_alloc(int ab, unsigned short size) {
  unsigned char *dat = ublocks[ablocks[ab].ublock].data;
  int off, off2 = 0, free;
  unsigned short psize = size << 1;

  /* find the best fitting unused node by looping through the unused nodes
   * linked list, while keeping track of the previous node in the list (off2)
   * in order to update it's fnext reference */
  off = datshort(dat) << conf_alignment;
  assert(off > 0);
  while(1) {
    if(!node_fnext(dat+off) || node_head(dat+off) >= psize)
      break;
    off2 = off;
    off = node_fnext(dat+off) << conf_alignment;
  }

  /* mark this node as used and remove it from the free nodes list */
  node_foot(dat+off) |= 1;
  if(off2)
    node_fnext(dat + off2) = node_fnext(dat+off);
  else
    datshort(dat) = node_fnext(dat+off);

  /* if this was the largest free node, update ablock.free */
  if(!node_fnext(dat + off))
    ablocks[ab].free = off2 ? (node_head(dat+off2)>>1) : 0;
  /* reset the next pointer to make sure the node data contains only zeros */
  else
    node_fnext(dat+off) = 0;

  /* split this node into another empty node if it is large enough */
  free = ((node_head(dat+off) >> 1) - size) << conf_alignment;
  if(free > MIN_NODE_SIZE) {
    /* decrease size of the large node to the specified size */
    node_head(dat+off) = psize | (node_head(dat+off) & 1);
    /* calculate offset of the free node */
    off2 = off + (size << conf_alignment);
    /* update metadata of this and next node */
    node_head(dat+off2) = ((free >> conf_alignment) << 1) | 1;
    node_foot(dat+off2) &= ~1;
    /* add to the free nodes list */
    node_addfree(ab, off2);
  }

  return off;
}



compll_t compll_alloc(unsigned int size) {
  int ab, off;
  unsigned short spack;

  /* we can't handle nodes that do not fit within a block
   * (see explanation of the new block layout for the '34') */
  if(size+34 >= conf_blocksize)
    return 0;

  /* increase the size of the node to be at least MIN_NODE_SIZE, and to hold
   * the metadata of the next node and padding bytes to make sure it's properly
   * aligned */
  if(size < MIN_NODE_SIZE)
    size = MIN_NODE_SIZE;
  size = align(size + 2);
  spack = size >> conf_alignment;

  /* find a suitable block */
  if((ab = block_getfree(spack)) == -1)
    return 0;

  /* find/update the node */
  off = node_alloc(ab, spack);
  assert(off >= 0);

  return (((compll_t)ab) << 24) + off;
}



int compll_init(unsigned int   block_size,
                unsigned short alignment,
                unsigned short uncomp_count,
                compll_compress_callback   compress_cb,
                compll_decompress_callback decompress_cb) {
  int i;

  /* already initialized? return error */
  if(conf_blocksize)
    return 1;

  /* the maximum block size this library can currently handle is 512kB.
   * blocks smaller than 1kB simply make no sense. */
  if(block_size > 512*1024 || block_size < 1024)
    return 2;
  conf_blocksize = block_size;


  /* make sure that the alignment makes sense and convert it into a bit count */
  switch(alignment) {
    case  1: alignment = 0; break;
    case  2: alignment = 1; break;
    case  4: alignment = 2; break;
    case  8: alignment = 3; break;
    case 16: alignment = 4; break;
    default: return 3;
  }

  /* increase the alignment so that the block size fits into a 15 bits integer
   * (actually, so that the maximum *node* size fits into a 15 bits integer,
   *  the maximum node size is always a few bytes less than the block size) */
  conf_alignment = max(alignment,
    block_size <=  32*1024 ? 0 :
    block_size <=  64*1024 ? 1 :
    block_size <= 128*1024 ? 2 :
    block_size <= 256*1024 ? 3 : 4
  );

  /* set the bitmask */
  conf_align_mask = 0;
  for(i=conf_alignment; i; i--)
    conf_align_mask = (conf_align_mask<<1) | 1;


  /* initialize ucompressed block array. note that there is strictly speaking
   * no upper bound on the number of blocks to keep in memory uncompressed, but
   * it's a bad idea to set this too high. */
  if(uncomp_count < 1)
    return 4;
  conf_ublockcount = uncomp_count;

  /* allocate the array */
  if((ublocks = malloc(conf_ublockcount * sizeof(struct ublock))) == NULL)
    return 4;

  /* initialize its items */
  for(i=0; i<conf_ublockcount; i++) {
    ublocks[i].data = NULL;
    ublocks[i].ablock = -1;
    ublocks[i].lasta = 0;
  }

  /* set the callbacks */
  if(compress_cb == NULL || decompress_cb == NULL)
    return 5;
  conf_compress   = compress_cb;
  conf_decompress = decompress_cb;

  /* initialize the compression buffer to twice the block size */
  if((compress_buf = malloc(conf_blocksize*2)) == NULL)
    return 6;

  return 0;
}





/* debugging code while developing the library */
#ifdef DEBUG

#include <stdio.h>

/* dummy compress/decompress functions */
unsigned int dbg_compress(const unsigned char *src, unsigned int srclen, unsigned char *dst, unsigned int dstlen) {
  memcpy(dst, src, min(dstlen, srclen));
  return srclen;
}
void dbg_decompress(const unsigned char *src, unsigned int srclen, unsigned char *dst, unsigned int dstlen) {
  memcpy(dst, src, min(dstlen, srclen));
}


int main() {
  int i, off;
  FILE *f;

  printf("compll_init = %d\n", compll_init(1<<16, 8, 10, dbg_compress, dbg_decompress));

  for(i=4; i<=16; i+=4) {
    off = compll_alloc(i);
    printf("block_getfree(%d) = %d (free = %d)\n", i, off, ablocks[0].free);
  }

  f = fopen("dump", "w");
  fwrite(ublocks[ablocks[0].ublock].data, conf_blocksize, 1, f);
  fclose(f);

  return 0;
}

#endif