/* compll - A compressed memory allocator Copyright (c) 2010 Yoran Heling Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef COMPLL_NOLIB #include "compll.h" #include #include #include #include /* global configuration */ /* minimum lengh of the data part of a node, setting this too low can badly * increase the length of the unused node list and slow down operations of * alloc() and free(). Setting this too high would waste memory. Must be at * least 6, in order to hold the unused nodes linked list and node size. */ #define MIN_NODE_SIZE 6 unsigned int conf_alignment; /* in bit count, actual alignment bytes = 2^a */ unsigned int conf_align_mask; /* the bitmask where the bits-to-align-to are one and the rest is zero */ unsigned int conf_blocksize = 0; /* 0 = library not initialized */ unsigned short conf_ublockcount; compll_compress_callback conf_compress; compll_decompress_callback conf_decompress; /* an "archived" block, every memory block is present in an array of ablock structs. */ struct ablock { /* pointer to the compressed block, NULL if it is in an uncompressed state and has been modified. */ unsigned char *data; /* size of the compressed data (only used if data != NULL) */ unsigned int data_len; /* index to the ublocks array, -1 if it's not in an uncompressed state */ short ublock; /* size of the largest unused node (as a multiple of conf_alignment) */ unsigned short free; }; /* An unused ablock is identified with data = NULL and ublock = -1 */ /* an uncompressed block, there's a fixed-size array of this */ struct ublock { /* pointer to the uncompressed data */ unsigned char *data; /* index to the ablocks array, -1 if it's not used for any block */ int ablock; /* last time this block has been accessed */ unsigned int lasta; }; /* a global instruction counter to indicate when a block has last been * accessed, will be incremented each time a node is accessed * Note: The code should be able to handle an overflow */ unsigned int instruction_count = 0; /* the ablocks and ublocks arrays, will be allocated when the library is initialized. * ablocks holds all the memory blocks, and its size is variable * ublocks has a fixed size of conf_ublockcount */ struct ablock *ablocks = NULL; struct ublock *ublocks = NULL; /* number of ablock entries in the ablocks array */ int ablocks_size = 0, ablocks_last = 0; /* a static buffer used to write temporary compression data to */ unsigned char *compress_buf = NULL; /* useful macros */ #define min(a,b) ((a)<(b)?(a):(b)) #define max(a,b) ((a)>(b)?(a):(b)) /* #define align(l) ( (l) + ((conf_alignment - ((l) % conf_alignment)) % conf_alignment)) */ #define align(l) ( ((l) + conf_align_mask) & ~conf_align_mask ) /* readable way to access an unsigned short within an (unsigned char *) */ #define datshort(ch) (*(unsigned short *) (ch)) /* macros to access metadata of a node within a memory block, ch = start of node data */ #define node_head(ch) datshort((ch) - 2) #define node_foot(ch) datshort((ch) + ((node_head(ch) >> 1) << conf_alignment) - 2) #define node_fnext(ch) datshort(ch) #define node_fprev(ch) datshort((ch) + 2) #define node_usize(ch) datshort((ch) + ((node_head(ch) >> 1) << conf_alignment) - 4) #define node_psize(ch) datshort((ch) - 4) /* update the 'lasta' field in an uncompressed block and increase the * instruction counter. */ #define ublock_access(i) (ublocks[i].lasta = ++instruction_count) /* Find a slot in the uncompressed blocks and optionally re-compress the block * that is being replaced. Also increases the last access time for the new block. * Returns -1 on error. */ static int block_uslot() { int i, cur = 0; unsigned int cur_lasta = UINT_MAX, comp_size; struct ablock *a; /* find new slot */ for(i=0; idata == NULL) { comp_size = conf_compress(ublocks[cur].data, conf_blocksize, compress_buf, conf_blocksize*2); if(comp_size > conf_blocksize*2 || (a->data = malloc(comp_size)) == NULL) return -1; a->data_len = comp_size; memcpy(a->data, compress_buf, comp_size); } /* it's not in an uncompressed state anymore */ a->ublock = -1; } /* allocate a new block if this slot didn't have memory allocated yet */ if(ublocks[cur].data == NULL) if((ublocks[cur].data = malloc(conf_blocksize)) == NULL) return -1; /* increase the access count */ ublock_access(cur); return cur; } /* create a new empty memory block, returns its ablocks index or -1 on error */ static int block_alloc() { int ub,ab,i; /* get a new slot in the ublocks */ if((ub = block_uslot()) < 0) return -1; /* initialize the data with zeroes */ memset(ublocks[ub].data, 0, conf_blocksize); /* get the first unused slot in the ablocks array. Items in the array are * never deallocated or removed and are allocated sequentially. */ ab = ablocks_last++; /* Is the array too small to hold the new item? allocate more memory. * The first time the array is allocated it is large anough for 64 blocks, * each time it has to be grown the size is multiplied with 1.5 */ if(ab >= ablocks_size) { if(ab == 0) { ablocks_size = 64; if((ablocks = malloc(ablocks_size * sizeof(struct ablock))) == NULL) return -1; } else { ablocks_size = 3*(ablocks_size>>1); if((ablocks = realloc(ablocks, ablocks_size * sizeof(struct ablock))) == NULL) return -1; } for(i=ab; i= ablocks_size || ab < 0 || (!ablocks[ab].data && ablocks[ab].ublock < 0)) return -1; /* don't do anything if the block is already loaded */ if(ablocks[ab].ublock >= 0) { ublock_access(ablocks[ab].ublock); return ablocks[ab].ublock; } /* get a new slot in the ublocks */ if((ub = block_uslot()) < 0) return -1; /* decompress the block and update the references */ conf_decompress(ablocks[ab].data, ablocks[ab].data_len, ublocks[ub].data, conf_blocksize); ablocks[ab].ublock = ub; ublocks[ub].ablock = ab; return ub; } /* invalidate the compressed data of a block */ #define block_dirty(ab) \ if(ablocks[ab].data) { \ free(ablocks[ab].data); \ ablocks[ab].data = NULL; \ } /* * The following is the result of a brainstorm session on how to store the node * size into the 15 bits while taking into account the alignment and the fact * that we need to use the size field to determine the location of the next * node. * * alignment = 8, metadata = 2 * node size = 1: 15, 2: 16, 3: 14 * * stored size = node data = align(node size) * start of next node metadata = align(stored size + 2) - 2 * 1: 7 2: 6 3: 8 * 6: [ 2 s=16 ] 6: [ 2 s=16 ] 6: [ 2 s=16 ] * 8: [16 data ] 8: [16 data ] 8: [16 data ] * 24: [ 6 padding ] 24: [ 6 padding ] 24: [ 6 padding ] * 30: [ 2 metadata] 30: [ 2 metadata] 30: [ 2 metadata] * 32: [.. next ] 32: [.. next ] 32: [.. next ] * * stored size = align(node size + 2) * start of next node metadata = node data = stored size - 2 * 1: 7 2: 6 3: 0 * 6: [ 2 s=24 ] 6: [ 2 s=24 ] 6: [ 2 s=16 ] * 8: [22 data ] 8: [22 data ] 8: [14 data ] * 30: [ 2 metadata] 30: [ 2 metadata] 22: [ 2 metadata] * 32: [.. next ] 32: [.. next ] 24: [.. next ] * * If you were able to figure out what that odd visialisation above is supposed * to mean, you'll see that the second method wastes less space and is easier * to work with. */ /* Layout of a new memory block: * * 0: [ 2 - offset of first node] * fm: [ 2 - size of the first node, previous node = used] * fd: [ns - unallocated data (zeros)] * fs: [ 2 - ns] * lm: [ 2 - next node size = 0, previous node = unused] * * fm = metadata of first node = fd - 2 * fd = offset of first node = align(4) * ns = size of the first node = floor((blocksize - fd) / alignment) = (blocksize - fd) & ~conf_align_mask * fs = offset of the duplicated size of the first node = fd+ns-4 * lm = metadata of last node = fd+ns-2 * * This means that the largest node *contents* within a block can be calculated with: * ((blocksize - fd) & ~conf_align_mask) - 2 * Assuming an alignment of 16 bytes and assuming worst case, this can be simplified to: * blocksize - 16 - 16 - 2 = blocksize - 34 */ /* Splitting an unused node in two smaller nodes: * example has 4 bytes alignment * * 2: [ 2 st=36 ] -> unused node * 4: [34 datat ] * 38: [ 2 metanext] -> the next node * 40: [.. datanext] * * v * * 2: [ 2 s1=16 ] -> node is now in use * 4: [14 data1 ] * 18: [ 2 s2=20 ] -> s2 = st - s1 (always properly aligned) * 20: [18 data2 ] * 38: [ 2 metanext] -> next node, unmodified * 40: [.. datanext] * * A node can be split as long as s2 >= MIN_NODE_SIZE */ /* find a memory block which has a node free of at least the specified (packed) * size, creating a new block if all others are full. The block will be * available in ublocks. * returns the ablocks index or -1 on error*/ static int block_getfree(short size) { int ab, start, length; unsigned char *dat; /* find a suitable memory block, load it, and return. * TODO: this loops through the whole ablocks array in the worst case, which * is not really fast. Maybe we'll have to do some caching? */ for(ab=0; ab= size) { if(block_load(ab) == -1) return -1; return ab; } /* none found, create a new block and initialize the block structure */ if((ab = block_alloc()) == -1) return -1; dat = ublocks[ablocks[ab].ublock].data; /* the first two bytes contain the offset to the first unused node, this one * can be found at offset align(2+2) - 2 bytes for this pointer and 2 for the * metadata of the unused node */ start = align(4); datshort(dat) = start >> conf_alignment; /* metadata 2 bytes before the start of the unused node, this indicates the * length of the node and that the previous node should be considered as "in * use" */ length = (conf_blocksize - start) & ~conf_align_mask; datshort(dat+start-2) = ((length >> conf_alignment) << 1) | 1; /* the duplicated size of the next node. While this field is not really used * in the case of this node, every unused node should have this */ datshort(dat+start+length-4) = length >> conf_alignment; /* the metadata of the last node can be found at (dat+start+length-2) * should be 0 (length = 0, not in use), which it already is so we don't have * to do anything. */ ablocks[ab].free = length >> conf_alignment; return ab; } /* Add the specified node at the correct position in the unused nodes list and * update ablock.free if necessary */ static void node_addfree(int ab, int off) { unsigned char *dat = ublocks[ablocks[ab].ublock].data; unsigned short psize = node_head(dat+off) >> 1; int next, cur = 0; /* find the node within the free nodes list after which this node should be * inserted. (that is, of which the next node is larger than or equal to this * one) */ next = datshort(dat) << conf_alignment; while(next) { if((node_head(dat+next)>>1) >= psize) break; cur = next; next = node_fnext(dat+cur) << conf_alignment; } /* if this is the last node in the list (thus the largest free node in this * block), update ablock.free */ if(!next) ablocks[ab].free = psize; /* update references */ node_fnext(dat+off) = next ? (next >> conf_alignment) : 0; node_fprev(dat+off) = cur ? (cur >> conf_alignment) : 0; if(cur) node_fnext(dat+cur) = off >> conf_alignment; else datshort(dat) = off >> conf_alignment; if(next) node_fprev(dat+next) = off >> conf_alignment; } /* removes a node from the unused node list and updates ablock.free if this was * the largest node. This function does not touch (meta-)data of the node itself */ static void node_rmfree(int ab, int off) { unsigned char *dat = ublocks[ablocks[ab].ublock].data; int prev = node_fprev(dat+off) << conf_alignment, next = node_fnext(dat+off) << conf_alignment; /* update references */ if(prev) node_fnext(dat+prev) = node_fnext(dat+off); else datshort(dat) = node_fnext(dat+off); if(next) node_fprev(dat+next) = node_fprev(dat+off); /* if this was the largest free node, update ablock.free */ if(!next) ablocks[ab].free = prev ? (node_head(dat+prev)>>1) : 0; } /* Allocate a node of the specified (packed) size within a memory block. * Assumes the block is available uncompressed. * Returns the byte offset of the node data within the block.*/ static int node_alloc(int ab, unsigned short size) { unsigned char *dat = ublocks[ablocks[ab].ublock].data; int off, off2 = 0, free; unsigned short psize = size << 1; /* find the best fitting unused node by looping through the unused nodes * linked list */ off = datshort(dat) << conf_alignment; assert(off > 0); while(1) { if(!node_fnext(dat+off) || node_head(dat+off) >= psize) break; off = node_fnext(dat+off) << conf_alignment; } node_foot(dat+off) |= 1; node_rmfree(ab, off); node_fnext(dat+off) = node_fprev(dat+off) = node_usize(dat+off) = 0; /* split this node into another empty node if it is large enough */ free = ((node_head(dat+off) >> 1) - size) << conf_alignment; if(free >= MIN_NODE_SIZE+2) { node_head(dat+off) = psize | (node_head(dat+off) & 1); off2 = off + (size << conf_alignment); node_head(dat+off2) = ((free >> conf_alignment) << 1) | 1; node_usize(dat+off2) = free >> conf_alignment; node_foot(dat+off2) &= ~1; node_addfree(ab, off2); } return off; } compll_t compll_alloc(unsigned int size) { int ab, off; unsigned short spack; /* we can't handle nodes that do not fit within a block * (see explanation of the new block layout for the '34') */ if(size+34 >= conf_blocksize) return 0; /* increase the size of the node to be at least MIN_NODE_SIZE, and to hold * the metadata of the next node and padding bytes to make sure it's properly * aligned */ if(size < MIN_NODE_SIZE) size = MIN_NODE_SIZE; size = align(size + 2); spack = size >> conf_alignment; /* find a suitable block */ if((ab = block_getfree(spack)) == -1) return 0; /* find/update the node */ off = node_alloc(ab, spack); assert(off >= 0); block_dirty(ab); return (((compll_t)ab) << 24) + off; } void compll_free(compll_t node) { int ab = node >> 24, off = node & 0xFFFFFF; int ub, off2; unsigned char *dat; if((ub = block_load(ab)) < 0) return; dat = ublocks[ub].data; node_foot(dat+off) &= ~1; memset(dat+off, 0, ((node_head(dat+off)>>1)<>1) << conf_alignment); if(node_head(dat+off2) && !(node_foot(dat+off2) & 1)) { node_rmfree(ab, off2); node_head(dat+off) += node_head(dat+off2); node_head(dat+off2) = node_fnext(dat+off2) = node_fprev(dat+off2) = 0; } node_usize(dat+off) = node_head(dat+off)>>1; node_addfree(ab, off); block_dirty(ab); } const void *compll_read(compll_t node) { int ab = node >> 24, off = node & 0xFFFFFF, ub; if((ub = block_load(ab)) < 0) return NULL; return ublocks[ub].data + off; } void *compll_write(compll_t node) { int ab = node >> 24, off = node & 0xFFFFFF, ub; if((ub = block_load(ab)) < 0) return NULL; block_dirty(ab); return ublocks[ub].data + off; } int compll_init(unsigned int block_size, unsigned short alignment, unsigned short uncomp_count, compll_compress_callback compress_cb, compll_decompress_callback decompress_cb) { int i; /* already initialized? return error */ if(conf_blocksize) return 1; /* the maximum block size this library can currently handle is 512kB. * blocks smaller than 1kB simply make no sense. */ if(block_size > 512*1024 || block_size < 1024) return 2; conf_blocksize = block_size; /* make sure that the alignment makes sense and convert it into a bit count */ switch(alignment) { case 1: alignment = 0; break; case 2: alignment = 1; break; case 4: alignment = 2; break; case 8: alignment = 3; break; case 16: alignment = 4; break; default: return 3; } /* increase the alignment so that the block size fits into a 15 bits integer * (actually, so that the maximum *node* size fits into a 15 bits integer, * the maximum node size is always a few bytes less than the block size) */ conf_alignment = max(alignment, block_size <= 32*1024 ? 0 : block_size <= 64*1024 ? 1 : block_size <= 128*1024 ? 2 : block_size <= 256*1024 ? 3 : 4 ); /* set the bitmask */ conf_align_mask = 0; for(i=conf_alignment; i; i--) conf_align_mask = (conf_align_mask<<1) | 1; /* initialize ucompressed block array. note that there is strictly speaking * no upper bound on the number of blocks to keep in memory uncompressed, but * it's a bad idea to set this too high. */ if(uncomp_count < 1) return 4; conf_ublockcount = uncomp_count; /* allocate the array */ if((ublocks = malloc(conf_ublockcount * sizeof(struct ublock))) == NULL) return 4; /* initialize its items */ for(i=0; i