diff options
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | compll.c | 251 |
2 files changed, 172 insertions, 81 deletions
@@ -4,5 +4,5 @@ compll.o: compll.c compll.h ./compll clean: - rm -f compll.o + rm -f compll.o compll @@ -8,16 +8,14 @@ #include <assert.h> -/* useful macros */ -#define min(a,b) ((a)<(b)?(a):(b)) -#define max(a,b) ((a)>(b)?(a):(b)) - -/* #define align(l) ( (l) + ((conf_alignment - ((l) % conf_alignment)) % conf_alignment)) */ -#define align(l) ( ((l) + conf_align_mask) & ~conf_align_mask ) - +/* global configuration */ +/* minimum lengh of the data part of a node, setting this too low can badly + * increase the length of the unused node list and slow down operations of + * alloc() and free(). Setting this too high would waste memory. + * Must be at least 2, in order to hold the unused nodes linked list. */ +#define MIN_NODE_SIZE 4 -/* global configuration */ unsigned int conf_alignment; /* in bit count, actual alignment bytes = 2^a */ unsigned int conf_align_mask; /* the bitmask where the bits-to-align-to are one and the rest is zero */ @@ -28,7 +26,6 @@ compll_decompress_callback conf_decompress; - /* an "archived" block, every memory block is present in an array of ablock structs. */ struct ablock { /* pointer to the compressed block, NULL if it is in an uncompressed state and has been modified. */ @@ -54,17 +51,6 @@ struct ublock { }; -/* an overlay on the header of a node, makes acessing them easier */ -struct node_head { - /* size & usage information */ - unsigned short head; - /* next and previous nodes in the free nodes list, only used when the node - * itself is unused */ - unsigned short next; - unsigned short prev; -}; - - /* a global instruction counter to indicate when a block has last been * accessed, will be incremented each time a node is accessed * Note: The code should be able to handle an overflow */ @@ -85,6 +71,21 @@ unsigned char *compress_buf = NULL; +/* useful macros */ +#define min(a,b) ((a)<(b)?(a):(b)) +#define max(a,b) ((a)>(b)?(a):(b)) + +/* #define align(l) ( (l) + ((conf_alignment - ((l) % conf_alignment)) % conf_alignment)) */ +#define align(l) ( ((l) + conf_align_mask) & ~conf_align_mask ) + +/* readable way to access an unsigned short within an (unsigned char *) */ +#define datshort(ch) (*(unsigned short *) (ch)) + +/* macros to access metadata of a node within a memory block, ch = start of node data */ +#define node_head(ch) datshort((ch) - 2) +#define node_foot(ch) datshort((ch) + ((node_head(ch) >> 1) << conf_alignment) - 2) +#define node_fnext(ch) datshort(ch) + /* update the 'lasta' field in an uncompressed block and increase the * instruction counter. */ #define ublock_access(i) (ublocks[i].lasta = ++instruction_count) @@ -186,14 +187,17 @@ static int block_alloc() { } -/* Load an archived block into an uncompressed slot, returns the ublocks index - * or -1 on error */ +/* Load an archived block into an uncompressed slot if it's not in an + * uncompressed state already, and updates the access time. returns the ublocks + * index or -1 on error */ static int block_load(int ab) { int ub; /* don't do anything if the block is already loaded */ - if(ablocks[ab].ublock) + if(ablocks[ab].ublock >= 0) { + ublock_access(ablocks[ab].ublock); return ablocks[ab].ublock; + } /* get a new slot in the ublocks */ if((ub = block_uslot()) < 0) @@ -209,6 +213,50 @@ static int block_load(int ab) { } +/* find a memory block which has a node free of at least the specified (packed) + * size, creating a new block if all others are full. The block will be + * available in ublocks. + * returns the ablocks index or -1 on error*/ +static int block_getfree(short size) { + int ab, start, length; + unsigned char *dat; + + /* find a suitable memory block, load it, and return. + * TODO: this loops through the whole ablocks array in the worst case, which + * is not really fast. Maybe we'll have to do some caching? */ + for(ab=0; ab<ablocks_size; ab++) + if((ablocks[ab].data != NULL || ablocks[ab].ublock != -1) && ablocks[ab].free >= size) { + if(block_load(ab) == -1) + return -1; + return ab; + } + + /* none found, create a new block and initialize the block structure */ + if((ab = block_alloc()) == -1) + return -1; + dat = ublocks[ablocks[ab].ublock].data; + + /* the first two bytes contain the offset to the first unused node, this one + * can be found at offset align(2+2) - 2 bytes for this pointer and 2 for the + * metadata of the unused node */ + start = align(4); + datshort(dat) = start >> conf_alignment; + + /* metadata 2 bytes before the start of the unused node, this indicates the + * length of the node and that the previous node should be considered as "in + * use" */ + length = (conf_blocksize - start) & ~conf_align_mask; + datshort(dat+start-2) = ((length >> conf_alignment) << 1) | 1; + + /* the metadata of the last node can be found at (dat+start+length-2) and + * should be 0 (length = 0, not in use), which it already is so we don't have + * to do anything. */ + + ablocks[ab].free = length >> conf_alignment; + return ab; +} + + /* @@ -247,7 +295,7 @@ static int block_load(int ab) { * 0: [ 2 - offset of first node] * fm: [ 2 - size of the first node, previous node = used] * fd: [ns - unallocated data] - * lm: [ 2 - next node size = 0, previous node = used] + * lm: [ 2 - next node size = 0, previous node = unused] * * fm = metadata of first node = fd - 2 * fd = offset of first node = align(4) @@ -260,48 +308,58 @@ static int block_load(int ab) { * blocksize - 16 - 16 - 2 = blocksize - 34 */ +/* Splitting an unused node in two smaller nodes: + * example has 4 bytes alignment + * + * 2: [ 2 st=36 ] -> unused node + * 4: [34 datat ] + * 38: [ 2 metanext] -> the next node + * 40: [.. datanext] + * + * v + * + * 2: [ 2 s1=16 ] -> node is now in use + * 4: [14 data1 ] + * 18: [ 2 s2=20 ] -> s2 = st - s1 (always properly aligned) + * 20: [18 data2 ] + * 38: [ 2 metanext] -> next node, unmodified + * 40: [.. datanext] + * + * A node can be split as long as s2 >= MIN_NODE_SIZE + */ -/* find a memory block which has a node free of at least the specified (packed) - * size, creating a new block if all others are full. The block will be - * available in ublocks. - * returns the ablocks index or -1 on error*/ -static int block_getfree(short size) { - int ab, start, length; - unsigned char *dat; - /* find a suitable memory block, load it, and return. - * TODO: this loops through the whole ablocks array in the worst case, which - * is not really fast. Maybe we'll have to do some caching? */ - for(ab=0; ab<ablocks_size; ab++) - if((ablocks[ab].data != NULL || ablocks[ab].ublock != -1) && ablocks[ab].free >= size) { - if(block_load(ab) == -1) - return -1; - return ab; +/* Add the specified node at the correct position in the unused nodes list and + * update ablock.free if necessary */ +static void node_addfree(int ab, unsigned short off) { + unsigned char *dat = ublocks[ablocks[ab].ublock].data; + unsigned short psize = node_head(dat+off); + int cur; + + /* find the node within the free nodes list after which this node should be + * inserted. (that is, of which the next node is larger than or equal to this + * one) */ + cur = datshort(dat) << conf_alignment; + if(cur) { + while(1) { + if(!node_fnext(dat+cur) || node_head(dat + (node_fnext(dat+cur) << conf_alignment)) >= psize) + break; + cur = node_fnext(dat+cur) << conf_alignment; } + } - /* none found, create a new block and initialize the block structure */ - if((ab = block_alloc()) == -1) - return -1; - dat = ublocks[ablocks[ab].ublock].data; - - /* the first two bytes contain the offset to the first unused node, this one - * can be found at offset align(2+2) - 2 bytes for this pointer and 2 for the - * metadata of the unused node */ - start = align(4); - *((unsigned short *) dat) = start >> conf_alignment; - - /* metadata 2 bytes before the start of the unused node, this indicates the - * length of the node and that the previous node should be considered as "in - * use" */ - length = (conf_blocksize - start) & ~conf_align_mask; - *((unsigned short *) (dat+start-2)) = (length >> conf_alignment) << 1; - - /* and now the metadata of the last node (size=0, inuse=1) */ - *((unsigned short *) (dat+start+length-2)) = 1; - - ablocks[ab].free = length >> conf_alignment; - return ab; + /* if this is the last node in the list (thus the largest free node in this + * block), update ablock.free */ + if(!cur || !node_fnext(dat+cur)) + ablocks[ab].free = psize >> 1; + + /* update references */ + node_fnext(dat+off) = cur ? node_fnext(dat+cur) : 0; + if(cur) + node_fnext(dat+cur) = off >> conf_alignment; + else + datshort(dat) = off >> conf_alignment; } @@ -310,19 +368,50 @@ static int block_getfree(short size) { * Returns the byte offset of the node data within the block.*/ static int node_alloc(int ab, unsigned short size) { unsigned char *dat = ublocks[ablocks[ab].ublock].data; - struct node_head *cur, *next; + int off, off2 = 0, free; unsigned short psize = size << 1; - /* find the best fitting unused node */ - cur = (struct node_head *) (dat - 2 + (*(unsigned short *)dat)); - while(cur->next) { - next = (struct node_head *) (dat - 2 + (cur->next << conf_alignment)); - if(psize > next->head) + /* find the best fitting unused node by looping through the unused nodes + * linked list, while keeping track of the previous node in the list (off2) + * in order to update it's fnext reference */ + off = datshort(dat) << conf_alignment; + assert(off > 0); + while(1) { + if(!node_fnext(dat+off) || node_head(dat+off) >= psize) break; - cur = next; + off2 = off; + off = node_fnext(dat+off) << conf_alignment; } - return -1; + /* mark this node as used and remove it from the free nodes list */ + node_foot(dat+off) |= 1; + if(off2) + node_fnext(dat + off2) = node_fnext(dat+off); + else + datshort(dat) = node_fnext(dat+off); + + /* if this was the largest free node, update ablock.free */ + if(!node_fnext(dat + off)) + ablocks[ab].free = off2 ? (node_head(dat+off2)>>1) : 0; + /* reset the next pointer to make sure the node data contains only zeros */ + else + node_fnext(dat+off) = 0; + + /* split this node into another empty node if it is large enough */ + free = ((node_head(dat+off) >> 1) - size) << conf_alignment; + if(free > MIN_NODE_SIZE) { + /* decrease size of the large node to the specified size */ + node_head(dat+off) = psize | (node_head(dat+off) & 1); + /* calculate offset of the free node */ + off2 = off + (size << conf_alignment); + /* update metadata of this and next node */ + node_head(dat+off2) = ((free >> conf_alignment) << 1) | 1; + node_foot(dat+off2) &= ~1; + /* add to the free nodes list */ + node_addfree(ab, off2); + } + + return off; } @@ -336,11 +425,11 @@ compll_t compll_alloc(unsigned int size) { if(size+34 >= conf_blocksize) return 0; - /* increase the size of the node to be large enough to hold two node pointers - * within the same block (4 bytes), the metadata of the next node and padding - * bytes to make sure it's properly aligned */ - if(size < 4) - size = 4; + /* increase the size of the node to be at least MIN_NODE_SIZE, and to hold + * the metadata of the next node and padding bytes to make sure it's properly + * aligned */ + if(size < MIN_NODE_SIZE) + size = MIN_NODE_SIZE; size = align(size + 2); spack = size >> conf_alignment; @@ -352,7 +441,7 @@ compll_t compll_alloc(unsigned int size) { off = node_alloc(ab, spack); assert(off >= 0); - return off; + return (((compll_t)ab) << 24) + off; } @@ -452,16 +541,18 @@ void dbg_decompress(const unsigned char *src, unsigned int srclen, unsigned char int main() { - int i; + int i, off; FILE *f; - printf("compll_init = %d\n", compll_init(1<<16, 1, 10, dbg_compress, dbg_decompress)); + printf("compll_init = %d\n", compll_init(1<<16, 8, 10, dbg_compress, dbg_decompress)); - i = block_getfree(10); - printf("block_getfree(10) = %d\n", i); + for(i=4; i<=16; i+=4) { + off = compll_alloc(i); + printf("block_getfree(%d) = %d (free = %d)\n", i, off, ablocks[0].free); + } f = fopen("dump", "w"); - fwrite(ublocks[ablocks[i].ublock].data, conf_blocksize, 1, f); + fwrite(ublocks[ablocks[0].ublock].data, conf_blocksize, 1, f); fclose(f); return 0; |