/* * Copyright (c) Alex Holden 2000. * * This code is public domain; you can do whatever you want with it, though I * would prefer you to contribute any bug fixes back to me if possible. * This software comes with NO WARRANTY WHATSOEVER, not even the implied * warranties of merchantability and fitness for a particular purpose. * * huffman.c: Handles Huffman coding and decoding. */ #include #include #include #define HUFFMAN_PRIV #include "teatotal.h" #define FIX_PRECISION 9 #include "fixed.h" /* * Allocates a new huffstate structure and output buffer. The buflen argument * specifies the size of the input and output buffers (the inbuf element must * be set by the caller though). Returns a pointer to the new structure or * NULL on error. */ huffstate *huff_init(int buflen) { huffstate *ret; /* Allocate the huffstate structure */ if(!(ret = malloc(sizeof(huffstate)))) return NULL; /* Allocate the output buffer */ if(!(ret->outbuf = malloc(buflen))) { free(ret); return NULL; } /* Remember the length of the output buffer */ ret->buflen = buflen; /* Initialise the heap */ if(!(ret->hp = heap_init(257, (heap_comparefunc)compare_huffnodes))) { free(ret->outbuf); free(ret); return NULL; } /* Clear the input buffer pointer */ ret->inbuf = NULL; /* Return the new structure to the caller */ return ret; } /* * Frees a huffstate structure and it's associated output buffer. */ void huff_destroy(huffstate *state) { /* Free the output buffer */ free(state->outbuf); /* Destroy the heap */ heap_destroy(state->hp, 0); /* Free the state structure itself */ free(state); } /* * Attempts to encode a block of data of the specified length from the input * buffer into the output buffer. If the length is below the minimum size at * which it is possible to gain any benefit from compressing it, or the output * size ends up greater than or equal to the input size, it returns -1 so that * the caller can use the uncompressed version of the data instead. Otherwise * it returns the length of the output data in bytes. The length must not be * larger than the size of the output buffer (specified in the call to the * huff_init() function). */ int huffenc(huffstate *state, int len) { u8 cd; u16 code; int i, c, n, codelen, curbit, curbyte; /* Check that the block's large enough to try to compress it */ if(len < HUFF_MIN_ENC) return -1; /* Clean up the trees */ cleantrees(state); /* Make the table of scaled character occurrence frequencies */ make_scaled_freqs(state, len); /* Make the huffman tree */ make_huff_tree(state); /* Make the encoding table */ make_huff_enc_table(state); /* Zero the first byte and set the starting position */ curbyte = 256; curbit = 0; state->outbuf[curbyte] = 0; /* Go through the input buffer a byte at a time */ for(i = 0; i <= len; i++) { /* * This next section may look ugly, but it's actually quite * highly optimised (necessarily so, as it gets called for * every single byte). */ /* The last word is a magic "end of block" word */ if(i == len) c = 256; else c = state->inbuf[i]; /* Find the code and code length of this word */ code = state->nodes[c].code; cd = (code >> 8); codelen = state->nodes[c].length; /* Place as much as we can of the code in the current byte */ state->outbuf[curbyte] |= cd >> curbit; /* If there wasn't enough space in the current byte */ if((curbit + codelen) > 7) { /* Increment to the next byte and check for overflow */ if(++curbyte >= state->buflen) return -1; /* Zero the new byte out */ state->outbuf[curbyte] = 0; /* Shift the code by the amount we've already used */ n = 8 - curbit; code <<= n; cd = (code >> 8); codelen -= n; curbit = 0; /* Place as much as we can of the code in this byte */ state->outbuf[curbyte] |= cd >> curbit; /* If there wasn't enough space in the current byte */ if((curbit + codelen) > 7) { /* Increment to next byte */ if(++curbyte >= state->buflen) return -1; /* Zero the new byte out */ state->outbuf[curbyte] = 0; /* Shift the code by the amount we've used */ n = 8 - curbit; code <<= n; cd = (code >> 8); codelen -= n; curbit = 0; /* Place the remainder of the code */ state->outbuf[curbyte] |= cd >> curbit; } } /* Increment the bit position */ curbit += codelen; } /* The total number of bytes used is the current byte number + 1 */ curbyte++; /* Fail if the result is bigger than the uncompressed input */ if(curbyte >= len) return -1; /* Write the header */ for(i = 0; i < 256; i++) state->outbuf[i] = state->nodes[i].freq; /* Return the number of bytes that were output */ return curbyte; } /* * Attempts to decode a block of compressed data. If the decompression fails * (eg. because the decompressed data is larger than the output buffer), it * returns -1. Otherwise it returns the number of bytes which were decompressed * into the output buffer. */ int huffdec(huffstate *state, int len) { int outbyte = 0, c = 0, i; s32 bitpos, maxbits = len * 8; huffnode *n; /* Check that the block is at least long enough to contain the header */ if(len < 256) return -1; /* Clean up the trees */ cleantrees(state); /* Load the scaled character occurrence frequencies */ for(i = 0; i < 256; i++) state->nodes[i].freq = state->inbuf[i]; /* 256 is a special "end of block" character- we always have one */ state->nodes[256].freq = 1; /* Make the huffman tree */ make_huff_tree(state); /* Start after the header */ bitpos = (256 * 8) - 1; while(1) { /* Start at the root node */ n = state->root; /* Walk down through the table */ while(1) { /* Go to the next bit */ if(++bitpos >= maxbits) return -1; /* Go right if the next bit is 1; go left if it is 0 */ if(state->inbuf[bitpos / 8] & (0x80 >> (bitpos % 8))) n = n->rc; else n = n->lc; /* Make sure this node exists */ if(!n) return -1; /* Check if this node is a leaf */ if(!(n->lc) && !(n->rc)) { c = n->value; break; } } if(c == 256) break; else state->outbuf[outbyte++] = c; } return outbyte; } /* * Used to make the table of character occurrence frequencies, scaled down so * that they are all small enough to fit in a single byte. */ static void make_scaled_freqs(huffstate *state, int len) { int i, n; fix sf; /* Count the occurrences of each character in the buffer */ for(i = 0; i < len; i++) state->nodes[state->inbuf[i]].freq++; /* Find the largest number of occurrences */ for(i = 0, n = 0; i < 256; i++) if(state->nodes[i].freq > n) n = state->nodes[i].freq; /* 256 is a special "end of block" character- we always have one */ state->nodes[256].freq = 1; /* Calculate the scaling factor */ sf = fixdiv(inttofix(n), inttofix(255)); /* Scale the values in the table down to an unsigned 8 bit number */ for(i = 0; i < 256; i++) { /* Only scale if the character was used at least once */ if(state->nodes[i].freq) { /* Divide it by the scaling factor */ state->nodes[i].freq = fixtoint(fixdiv(inttofix( state->nodes[i].freq), sf)); /* Make sure it is at least 1 */ if(!state->nodes[i].freq) state->nodes[i].freq = 1; /* Clip to 255 (not really necessary but...) */ if(state->nodes[i].freq > 255) state->nodes[i].freq = 255; } } } /* * Builds the huffman tree from the frequency table. Used by both the encoder * and the decoder. */ static void make_huff_tree(huffstate *state) { int i; huffnode *l, *r, *n = NULL; /* Shove the non-zero probability huffnodes into the heap. The * properties of the heap mean that whenever we call heap_get_node() * below, we will get back the next lowest probability node (ie. the * one with the lowest number of occurrences) */ for(i = 0; i < 257; i++) if(state->nodes[i].freq) heap_add_node(state->hp, &state->nodes[i]); /* Loop around merging the two lowest trees together using the next * free nodule to join them. We know that it always takes alphabetsize * - 1 merges to end up with only the root node remaining. */ for(i = heap_nodes(state->hp) - 1; i; i--) { /* Get the next two lowest nodes */ l = heap_remove_node(state->hp); r = heap_remove_node(state->hp); /* Get the next free nodule and cast it to a node */ n = (huffnode *) &state->nodules[state->nodule++]; /* Make the two lowest nodes children of the nodule */ n->lc = l; n->rc = r; /* Set the nodule freq to the sum of the child node freqs */ n->freq = l->freq + r->freq; /* Push the nodule onto the heap in place of the children */ heap_add_node(state->hp, n); } /* Set the root node to the address of the root nodule */ state->root = n; } /* * The actual guts of make_huff_enc_table(). Recurses through the tree * generating the codes and code lengths from it. */ static void makehuffenctab(huffstate *state, huffnode *node, u16 code, u8 len) { /* Check if this is a leaf */ if(!(node->lc) && !(node->rc)) { /* Yes, so set up this node entry and return */ node->code = code; node->length = len; return; } /* Add a bit to the code */ len++; /* Descend down the left branch if there is one (LSB of code = 0) */ if(node->lc) makehuffenctab(state, node->lc, code, len); /* Descend down the right branch if there is one (LSB of code = 1) */ if(node->rc) makehuffenctab(state, node->rc, (code | (0x8000 >> (len - 1))), len); } /* * Compare the probability of two huffnodes. Returns 1 if b is higher than * a, or 0 if it is less than or equal to a. Used by the heap to order the * heap entries. */ static int compare_huffnodes(huffnode *a, huffnode *b) { return(a->freq < b->freq); } static void cleantrees(huffstate *state) { int i; /* Clear the heap */ heap_clear(state->hp); /* Clear out the binary tree */ memset(state->nodes, 0, sizeof(state->nodes)); /* Set up the binary tree character entries */ for(i = 0; i < 257; i++) state->nodes[i].value = i; /* Clear the root node pointer */ state->root = NULL; /* Clear the nodules */ memset(state->nodules, 0, sizeof(state->nodules)); state->nodule = 0; }