PackIt huff.c

/*		Huffman Encoding

	This module performs Huffman encoding and decoding.

	Initial coding 2/8/86 by Harry R. Chesley.

	© Copyright 1986 by Harry R. Chesley.
	All rights reserved.
*/

#include <types.h>

#define NIL 0L

/*#define DEBUG*/

struct treeNode {
	struct treeNode *parent;		/* The parent of this node (or NIL if at the top). */
	char terminal;					/* TRUE if this is a terminal node. */
	char marked;					/* TRUE if we've compressed this node already. */
	long count;						/* Count of times this node appears in the source. */
	union {
		char value;					/* Value of node if terminal. */
		struct {
			struct treeNode *right;	/* Right branch if non-terminal. */
			struct treeNode *left;	/* Left branch. */
		} br;
	} val;
};

static struct treeNode tree[511];		/* The tree. */

static struct treeNode *nextNode;		/* Pointer to the next free node. */

static struct treeNode *valToTree[256];	/* Pointers to terminal tree nodes; indexed by value. */

static struct treeNode *treeTop;		/* The top of the encoded tree. */

/*	HuffInit()

	Function: Initialize the tree to nothing seen.

	Algorithm: Just set the index to zip.

	Comments: None.
*/

HuffInit()

{
	register struct treeNode **vttPtr;

	nextNode = tree;
	for (vttPtr = valToTree; vttPtr < &valToTree[256]; *vttPtr++ = NIL);
}

/*	HuffCount(blk,sz)

	Function: Count the bytes in the block blk, size sz, as part of the data to be encoded.

	Algorithm: Scan thru the block. For bytes already in the tree, add one to the count, for those
	not in the tree, allocate a node for them.

	Comments: None.
*/

HuffCount(blk,sz)

char *blk;
long sz;

{
	register char *ptr;
	register long cnt;
	register struct treeNode *treePtr;
	register unsigned b;

	for (ptr = blk, cnt = sz; cnt--;) {
		b = ((unsigned) *ptr++) & 0xFF;
		if ((treePtr = valToTree[b]) == NIL) {
			valToTree[b] = treePtr = nextNode;
			treePtr->terminal = TRUE;
			treePtr->marked = FALSE;
			treePtr->count = 1L;
			treePtr->val.value = b;
			nextNode++;
		} else treePtr->count++;
	};
}

/*	HuffCode()

	Function: Take the list of terminal nodes and build a binary tree from them.

	Algorithm: Repeatedly find the two lowest count nodes and combine them, until there's only
	one uncombined node left.

	Comments: None.
*/

HuffCode()

{
	register struct treeNode *treePtr;
	register struct treeNode *n1;
	struct treeNode *n2;

	if (nextNode == (tree+1)) {
		nextNode->terminal = TRUE;
		nextNode->marked = FALSE;
		nextNode->count = 0;
		nextNode->val.value = tree[0].val.value+1;
		nextNode++;
	};

	while (TRUE) {
		n1 = n2 = NIL;
		for (treePtr = tree; treePtr < nextNode; treePtr++) {
			if (treePtr->marked) continue;
			if (n1 == NIL) {
				if (n2 == NIL) n2 = treePtr;
				else if (n2->count <= treePtr->count) n1 = treePtr;
				else {
					n1 = n2; n2 = treePtr;
				};
			} else if (n1->count > treePtr->count) {
				if (n2->count > treePtr->count) {
					n1 = n2; n2 = treePtr;
				} else n1 = treePtr;
			};
		};
		if (n1 == NIL) {
			treeTop = n2;
			return;
		};
		n1->marked = n2->marked = TRUE;
		n1->parent = n2->parent = nextNode;
		nextNode->parent = NIL;
		nextNode->terminal = nextNode->marked = FALSE;
		nextNode->count = n1->count + n2->count;
		nextNode->val.br.left = n1;
		nextNode->val.br.right = n2;
		nextNode++;
	};
}

/*	HuffStat(preSz,tabSz,dataSz)

	Function: Return the size of the pre- and post-Huffman encoded data. preSz is the size before
	encoding, tabSz is the size of the Huffman tree table, and dataSz is the size of the encoded
	data, not counting the table. All sizes are given in bytes.

	Algorithm: preSz is the count in the top node, tabSz is one bit for every node, plus eight bits
	for every terminal node, and dataSz is found by traversing the tree and multiplying each
	terminal node count by the depth of that node. Each size is rounded to the nearest byte value.

	Comments: None.
*/

HuffStat(preSz,tabSz,dataSz)

long *preSz;
long *tabSz;
long *dataSz;

{
	register struct treeNode *treePtr;
	register long bCnt;
	long trav();

	*preSz = treeTop->count;

	for (treePtr = tree, bCnt = 0L; treePtr < nextNode; treePtr++) {
		bCnt++;
		if (treePtr->terminal) bCnt += 8;
	};
	*tabSz = (bCnt+7L) >> 3;

	*dataSz = (trav(treeTop,0)+7L) >> 3;
}

static long trav(treePtr,depth)

register struct treeNode *treePtr;
register int depth;

{
	if (treePtr->terminal) return(((long) depth)*treePtr->count);
	return(trav(treePtr->val.br.left,depth+1)+trav(treePtr->val.br.right,depth+1));
}

/*	HuffBTable(bs)

	Function: Generate a bit-stream table from the tree, put the table at bs, and return the size
	in bytes.

	Algorithm: Traverse the tree, outputing it as a bit-stream.

	Comments: The bitified table will be at most 320 bytes long.
*/

long HuffBTable(bs,sz,func)

char *bs;
long sz;
int (*func)();

{
	long BitSize();

	BitInit(bs,sz,func);
	bitTrav(treeTop);
	return((BitSize()+7L)/8L);
}

bitTrav(treePtr)

register struct treeNode *treePtr;

{
	if (treePtr->terminal) {
		BitEncode(1);
		ByteEncode(treePtr->val.value);
	} else {
		BitEncode(0);
		bitTrav(treePtr->val.br.left);
		bitTrav(treePtr->val.br.right);
	};
}

/*	HuffBData(blk,sz)

	Function: Huffman encode the block of data at blk, of size sz, into the currently open
	bit-stream.

	Algorithm: Find the terminal node with the right value, then follow the chain up the tree
	to the parent by recursive calls, then return up the chain while outputing the tree path.

	Comments: It's up to the caller to do a BitInit() prior to calling this routine.
*/

HuffBData(blk,sz)

char *blk;
long sz;

{
	register char *ptr;
	register long cnt;

	for (ptr = blk, cnt = sz; cnt--; ptr++) dataTrav(valToTree[((unsigned) *ptr) & 0xFF]);
}

dataTrav(treePtr)

struct treeNode *treePtr;

{
	register struct treeNode *pPtr;

	if ((pPtr = treePtr->parent) != NIL) {
		dataTrav(pPtr);
		if (pPtr->val.br.left == treePtr) BitEncode(0);
		else BitEncode(1);
	};
}

/*	HuffDTable()

	Function: Build the Huffman table from the current bit-stream.

	Algorithm: Read in the bit stream, and build nodes and terminals as indicated.

	Comments: None.
*/

HuffDTable()

{
	struct treeNode *travDTab();

	HuffInit();
	treeTop = travDTab();
}

struct treeNode *travDTab()

{
	register struct treeNode *tPtr;

	tPtr = nextNode++;
	if (BitDecode() == 1) {
		tPtr->terminal = TRUE;
		tPtr->val.value = ByteDecode();
	} else {
		tPtr->terminal = FALSE;
		tPtr->val.br.left = travDTab();
		tPtr->val.br.right = travDTab();
	};
	return(tPtr);
}

/*	HuffDData(blk,sz)

	Function: De-Huffmanize sz bytes from the bit-stream, into the area pointed to by blk.

	Algorithm: Traverse the tree according to the bit-stream, and get the decoded value.

	Comments: None.
*/

HuffDData(blk,sz)

char *blk;
long sz;

{
	register char *ptr;
	register long cnt;
	register struct treeNode *tPtr;

	ptr = blk; cnt = sz;
	while (cnt--) {
		tPtr = treeTop;
		while (! tPtr->terminal)
			if (BitDecode() == 0) tPtr = tPtr->val.br.left;
			else tPtr = tPtr->val.br.right;
		*ptr++ = tPtr->val.value;
	};
}

/*	Bit-stream Operations

	Function: Encode/decode bit-streams:
			BitInit(bs,sz,func) - initialize bit-stream encoding/decoding. bs is a pointer to the
						bit-stream, sz is the size of the bit-stream area, and func is a
						function to call when the bit-stream area fills up or empties.
			BitEncode(bit) - add the bit to the bit-stream.
			ByteEncode(byte) - add the byte to the bit-stream.
			BitDecode() - return the next bit from the bit-stream.
			ByteDecode() - return the next byte from the bit-stream.
			BlkDecode(blk,sz) - return a series of bytes from the bit-stream.
			ByteAllign() - skip forward to the next even byte boundary.
			BitSize() - return the current size of the bit-stream, in bits.
			BitShift(sz) - remove sz bytes from the start of the bit-stream, shift the remaining
						bytes down to the beginning.

	Algorithm: Simple...

	Comments: None.
*/

static char *bsStart;		/* Start of bit-stream block. */
static char *bsPtr;			/* Current bit-stream byte. */
static int bsCnt;			/* Number of bits left in current byte. */
static long bsSz;			/* Size of the bit-stream area. */
static int (*bsFunc)();		/* Function to call when bit-stream fill or empties. */

BitInit(bs,sz,func)

char *bs;
long sz;
int (*func)();

{
	bsPtr = (bsStart = bs) - 1;
	bsCnt = 0;
	bsSz = sz;
	bsFunc = func;
}

BitEncode(bit)

int bit;

{
	if (bsCnt == 0) {
		if (bsSz == 0L) {
			if (bsFunc != NIL) (*bsFunc)();
			if (bsSz == 0L) {
				bsSz++;
				bsPtr--;
			};
		};
		bsSz--;
		*(++bsPtr) = 0;
		bsCnt = 8;
	};
	*bsPtr |= (bit & 1) << (--bsCnt);
}

ByteEncode(byte)

register unsigned byte;

{
	BitEncode(byte >> 7);
	BitEncode(byte >> 6);
	BitEncode(byte >> 5);
	BitEncode(byte >> 4);
	BitEncode(byte >> 3);
	BitEncode(byte >> 2);
	BitEncode(byte >> 1);
	BitEncode(byte);
}

BitDecode()

{
	if (bsCnt == 0) {
		if (bsSz == 0L) {
			if (bsFunc != NIL) (*bsFunc)();
			if (bsSz == 0L) return(0);
		};
		bsSz--;
		bsPtr++;
		bsCnt = 8;
	};
	return((*bsPtr >> (--bsCnt)) & 1);
}

ByteDecode()

{
	register unsigned res;

	res = BitDecode() << 7;
	res |= BitDecode() << 6;
	res |= BitDecode() << 5;
	res |= BitDecode() << 4;
	res |= BitDecode() << 3;
	res |= BitDecode() << 2;
	res |= BitDecode() << 1;
	res |= BitDecode();
	return(res);
}

BlkDecode(blk,sz)

char *blk;
long sz;

{
	register char *ptr;
	register long cnt;
	register long trSz;

	ptr = blk; cnt = sz;
	if ((bsCnt & 7) != 0) while (cnt--) *ptr++ = ByteDecode();
	else {
		while (sz) {
			if (bsSz == 0L) {
				if (bsFunc != NIL) (*bsFunc)();
				if (bsSz == 0L) {
					while (cnt--) *ptr++ = 0;
					return;
				};
			};
			trSz = (sz <= bsSz) ? sz : bsSz;
			BlockMove(bsPtr+1,ptr,trSz);
			sz -= trSz;
			ptr += trSz;
			bsPtr += trSz;
			bsSz -= trSz;
		};
	};
}

ByteAllign()

{
	while ((bsCnt & 7) != 0) BitDecode();
}

long BitSize()

{
	return( ( ((long) bsPtr) - ((long) bsStart) )*8L + ((long) (8-bsCnt)) );
}

long ByteSize()

{
	return(((long) bsPtr - bsStart) + 1L);
}

long ByteDeSize()

{
	return((bsCnt != 0) ? (bsSz+1L) : bsSz);
}

BitShift(sz)

register long sz;

{
	BlockMove(bsStart+sz,bsStart,bsSz);
	bsPtr -= sz;
	bsSz += sz;
}

/*	printNode(node)

	Function: Print out the value of the node.

	Algorithm: Simple.

	Comments: None.
*/

#ifdef DEBUG

printNode(node)

struct treeNode *node;

{
	printf("%8lx",node);

	if (node->marked) printf("*");
	else printf("-");

	printf("[%6lu]",node->count);

	printf(" parent: %8lx;",node->parent);

	if (node->terminal) printf(" terminal: %u",node->val.value);
	else printf(" left: %8lx, right: %8lx",node->val.br.left,node->val.br.right);

	printf(".\n");
}

#endif DEBUG