/*
 * Copyright (c) Alex Holden <alex@linuxhacker.org> 2000.
 *
 * This code is public domain; you can do whatever you want with it, though I
 * would prefer you to contribute any bug fixes back to me if possible.
 * This software comes with NO WARRANTY WHATSOEVER, not even the implied
 * warranties of merchantability and fitness for a particular purpose.
 *
 * huffman.c: Handles Huffman coding and decoding.
 */

#include <sys/types.h>
#include <stdlib.h>
#include <string.h>

#define HUFFMAN_PRIV
#include "teatotal.h"

#define FIX_PRECISION 9
#include "fixed.h"

/*
 * Allocates a new huffstate structure and output buffer. The buflen argument
 * specifies the size of the input and output buffers (the inbuf element must
 * be set by the caller though). Returns a pointer to the new structure or
 * NULL on error.
 */
huffstate *huff_init(int buflen)
{
	huffstate *ret;

	/* Allocate the huffstate structure */
	if(!(ret = malloc(sizeof(huffstate)))) return NULL;

	/* Allocate the output buffer */
	if(!(ret->outbuf = malloc(buflen))) {
		free(ret);
		return NULL;
	}

	/* Remember the length of the output buffer */
	ret->buflen = buflen;

	/* Initialise the heap */
	if(!(ret->hp = heap_init(257, (heap_comparefunc)compare_huffnodes))) {
		free(ret->outbuf);
		free(ret);
		return NULL;
	}

	/* Clear the input buffer pointer */
	ret->inbuf = NULL;

	/* Return the new structure to the caller */
	return ret;
}

/*
 * Frees a huffstate structure and it's associated output buffer.
 */
void huff_destroy(huffstate *state)
{
	/* Free the output buffer */
	free(state->outbuf);

	/* Destroy the heap */
	heap_destroy(state->hp, 0);

	/* Free the state structure itself */
	free(state);
}

/*
 * Attempts to encode a block of data of the specified length from the input
 * buffer into the output buffer. If the length is below the minimum size at
 * which it is possible to gain any benefit from compressing it, or the output
 * size ends up greater than or equal to the input size, it returns -1 so that
 * the caller can use the uncompressed version of the data instead. Otherwise
 * it returns the length of the output data in bytes. The length must not be
 * larger than the size of the output buffer (specified in the call to the
 * huff_init() function).
 */
int huffenc(huffstate *state, int len)
{
	u8 cd;
	u16 code;
	int i, c, n, codelen, curbit, curbyte;

	/* Check that the block's large enough to try to compress it */
	if(len < HUFF_MIN_ENC) return -1;

	/* Clean up the trees */
	cleantrees(state);

	/* Make the table of scaled character occurrence frequencies */
	make_scaled_freqs(state, len);

	/* Make the huffman tree */
	make_huff_tree(state);

	/* Make the encoding table */
	make_huff_enc_table(state);

	/* Zero the first byte and set the starting position */
	curbyte = 256;
	curbit = 0;
	state->outbuf[curbyte] = 0;

	/* Go through the input buffer a byte at a time */
	for(i = 0; i <= len; i++) {

		/*
		 * This next section may look ugly, but it's actually quite
		 * highly optimised (necessarily so, as it gets called for
		 * every single byte).
		 */

		/* The last word is a magic "end of block" word */
		if(i == len) c = 256;
		else c = state->inbuf[i];

		/* Find the code and code length of this word */
		code = state->nodes[c].code;
		cd = (code >> 8);
		codelen = state->nodes[c].length;

		/* Place as much as we can of the code in the current byte */
		state->outbuf[curbyte] |= cd >> curbit;

		/* If there wasn't enough space in the current byte */
		if((curbit + codelen) > 7) {

			/* Increment to the next byte and check for overflow */
			if(++curbyte >= state->buflen) return -1;

			/* Zero the new byte out */
			state->outbuf[curbyte] = 0;

			/* Shift the code by the amount we've already used */
			n = 8 - curbit;
			code <<= n;
			cd = (code >> 8);
			codelen -= n;
			curbit = 0;

			/* Place as much as we can of the code in this byte */
			state->outbuf[curbyte] |= cd >> curbit;

			/* If there wasn't enough space in the current byte */
			if((curbit + codelen) > 7) {

				/* Increment to next byte */
				if(++curbyte >= state->buflen) return -1;

				/* Zero the new byte out */
				state->outbuf[curbyte] = 0;

				/* Shift the code by the amount we've used */
				n = 8 - curbit;
				code <<= n;
				cd = (code >> 8);
				codelen -= n;
				curbit = 0;

				/* Place the remainder of the code */
				state->outbuf[curbyte] |= cd >> curbit;

			}
		}

		/* Increment the bit position */
		curbit += codelen;
	}

	/* The total number of bytes used is the current byte number + 1 */
	curbyte++;

	/* Fail if the result is bigger than the uncompressed input */
	if(curbyte >= len) return -1;

	/* Write the header */
	for(i = 0; i < 256; i++)
		state->outbuf[i] = state->nodes[i].freq;

	/* Return the number of bytes that were output */
	return curbyte;
}

/*
 * Attempts to decode a block of compressed data. If the decompression fails
 * (eg. because the decompressed data is larger than the output buffer), it
 * returns -1. Otherwise it returns the number of bytes which were decompressed
 * into the output buffer.
 */
int huffdec(huffstate *state, int len)
{
	int outbyte = 0, c = 0, i;
	s32 bitpos, maxbits = len * 8;
	huffnode *n;

	/* Check that the block is at least long enough to contain the header */
	if(len < 256) return -1;

	/* Clean up the trees */
	cleantrees(state);

	/* Load the scaled character occurrence frequencies */
	for(i = 0; i < 256; i++)
		state->nodes[i].freq = state->inbuf[i];

	/* 256 is a special "end of block" character- we always have one */
	state->nodes[256].freq = 1;

	/* Make the huffman tree */
	make_huff_tree(state);

	/* Start after the header */
	bitpos = (256 * 8) - 1;
	while(1) {
		/* Start at the root node */
		n = state->root;
		/* Walk down through the table */
		while(1) {
			/* Go to the next bit */
			if(++bitpos >= maxbits) return -1;

			/* Go right if the next bit is 1; go left if it is 0 */
			if(state->inbuf[bitpos / 8] & (0x80 >> (bitpos % 8)))
				n = n->rc;
			else n = n->lc;

			/* Make sure this node exists */
			if(!n) return -1;

			/* Check if this node is a leaf */
			if(!(n->lc) && !(n->rc)) {
				c = n->value;
				break;
			}
		}
		if(c == 256) break;
		else state->outbuf[outbyte++] = c;
	}

	return outbyte;
}

/*
 * Used to make the table of character occurrence frequencies, scaled down so
 * that they are all small enough to fit in a single byte.
 */
static void make_scaled_freqs(huffstate *state, int len)
{
	int i, n;
	fix sf;

	/* Count the occurrences of each character in the buffer */
	for(i = 0; i < len; i++) state->nodes[state->inbuf[i]].freq++;

	/* Find the largest number of occurrences */
	for(i = 0, n = 0; i < 256; i++)
		if(state->nodes[i].freq > n) n = state->nodes[i].freq;

	/* 256 is a special "end of block" character- we always have one */
	state->nodes[256].freq = 1;

	/* Calculate the scaling factor */
	sf = fixdiv(inttofix(n), inttofix(255));

	/* Scale the values in the table down to an unsigned 8 bit number */
	for(i = 0; i < 256; i++) {
		/* Only scale if the character was used at least once */
		if(state->nodes[i].freq) {
			/* Divide it by the scaling factor */
			state->nodes[i].freq = fixtoint(fixdiv(inttofix(
					state->nodes[i].freq), sf));
			/* Make sure it is at least 1 */
			if(!state->nodes[i].freq) state->nodes[i].freq = 1;
			/* Clip to 255 (not really necessary but...) */
			if(state->nodes[i].freq > 255)
				state->nodes[i].freq = 255;
		}
	}
}

/*
 * Builds the huffman tree from the frequency table. Used by both the encoder
 * and the decoder.
 */
static void make_huff_tree(huffstate *state)
{
	int i;
	huffnode *l, *r, *n = NULL;

	/* Shove the non-zero probability huffnodes into the heap. The
	 * properties of the heap mean that whenever we call heap_get_node()
	 * below, we will get back the next lowest probability node (ie. the
	 * one with the lowest number of occurrences) */
	for(i = 0; i < 257; i++)
		if(state->nodes[i].freq)
			heap_add_node(state->hp, &state->nodes[i]);

	/* Loop around merging the two lowest trees together using the next
	 * free nodule to join them. We know that it always takes alphabetsize
	 * - 1 merges to end up with only the root node remaining. */
	for(i = heap_nodes(state->hp) - 1; i; i--) {
		/* Get the next two lowest nodes */
		l = heap_remove_node(state->hp);
		r = heap_remove_node(state->hp);

		/* Get the next free nodule and cast it to a node */
		n = (huffnode *) &state->nodules[state->nodule++];

		/* Make the two lowest nodes children of the nodule */
		n->lc = l;
		n->rc = r;

		/* Set the nodule freq to the sum of the child node freqs */
		n->freq = l->freq + r->freq;

		/* Push the nodule onto the heap in place of the children */
		heap_add_node(state->hp, n);
	}

	/* Set the root node to the address of the root nodule */
	state->root = n;
}

/*
 * The actual guts of make_huff_enc_table(). Recurses through the tree
 * generating the codes and code lengths from it.
 */
static void makehuffenctab(huffstate *state, huffnode *node, u16 code, u8 len)
{
	/* Check if this is a leaf */
	if(!(node->lc) && !(node->rc)) {
		/* Yes, so set up this node entry and return */
		node->code = code;
		node->length = len;
		return;
	}

	/* Add a bit to the code */
	len++;

	/* Descend down the left branch if there is one (LSB of code = 0) */
	if(node->lc) makehuffenctab(state, node->lc, code, len);

	/* Descend down the right branch if there is one (LSB of code = 1) */
	if(node->rc)
		makehuffenctab(state, node->rc, (code | (0x8000 >> (len - 1))),
									len);
}

/*
 * Compare the probability of two huffnodes. Returns 1 if b is higher than
 * a, or 0 if it is less than or equal to a. Used by the heap to order the
 * heap entries.
 */
static int compare_huffnodes(huffnode *a, huffnode *b)
{
	return(a->freq < b->freq);
}

static void cleantrees(huffstate *state)
{
	int i;

	/* Clear the heap */
	heap_clear(state->hp);

	/* Clear out the binary tree */
	memset(state->nodes, 0, sizeof(state->nodes));

	/* Set up the binary tree character entries */
	for(i = 0; i < 257; i++)
		state->nodes[i].value = i;

	/* Clear the root node pointer */
	state->root = NULL;

	/* Clear the nodules */
	memset(state->nodules, 0, sizeof(state->nodules));
	state->nodule = 0;
}