############################################################################ # # Name: huffcode.icn # # Title: huffman coding tools # # Author: Richard L. Goerwitz # # Version: 1.4 # ############################################################################ # # An odd assortment of tools that lets me compress text using an # Iconish version of a generic Huffman algorithm. See block_encode(). # ############################################################################ # # Links: outbits.icn inbits.icn # # See also: press.icn # ############################################################################ record node(l,r,n) record _N(l,r) record leaf(c,n) record hcode(c,i,len) # For debugging purposes. # link ximage procedure count_chars(s, char_tbl) # # Count chars in s, placing stats in char_tbl (keys = chars in # s, values = leaf records, with the counts for each chr in s # contained in char_tbl[chr].n). # local chr initial { /char_tbl & stop("count_chars: need 2 args - 1 string, 2 table") *char_tbl ~= 0 & stop("count_chars: start me with an empty table!") } s ? { while chr := move(1) do { /char_tbl[chr] := leaf(chr,0) char_tbl[chr].n +:= 1 } } # write(ximage(char_tbl)) return *char_tbl # for lack of anything better end procedure heap_init(char_tbl) # # Create heap data structure out of the table filled out by # successive calls to count_chars(s,t). The heap is just a # list. Naturally, it's size can be obtained via *heap. # local heap heap := list() every push(heap, !char_tbl) do { resettle_heap(heap, 1) # write(ximage(heap)) } return heap end procedure resettle_heap(h, k) # # Based loosely on Sedgewick (2nd. ed., 1988), p. 160. Take k-th # node on the heap, and walk down the heap, switching this node # along the way with the child whose value is the least AND whose # value is less than this node's. Stop when you find no children # whose value is less than that of the original node. Elements on # heap are records of type leaf, with the values contained in the # "n" field. # local j # While we haven't spilled off the end of the heap (the size of the # heap is *h; *h / 2 is the biggest k we need to look at)... while k <= (*h / 2) do { # ...double k, assign the result to j. j := k+k # If we aren't at the end of the heap... if j < *h then { # ...check to see which of h[k]'s children is the smallest, # and make j point to it. if h[j].n > h[j+1].n then # h[j] :=: h[j+1] j +:= 1 } # If the current parent (h[k]) has a value less than those of its # children, then break; we're done. if h[k].n <= h[j].n then break # Otherwise, switch the parent for the child, and loop around # again, with k (the pointer to the parent) now pointing to the # new offset of the element we have been working on. h[k] :=: h[j] k := j } return k end procedure heap_2_huffman_tree(h) # # Construct the Huffman tree out of heap h. Find the smallest # element, pop it off the heap, then reshuffle the heap. After # reshuffling, replace the top record on the stack with a node() # record whose n field equal to the sum of the n fields for the # element popped off the stack originally, and the one that is # now about to be replaced. Link the new node record to the 2 # elements on the heap it is now replacing. Reshuffle the heap # again, then repeat. You're done when the size of the heap is # 1. That one element remaining (h[1]) is your Huffman tree. # # Based loosely on Sedgewick (2nd ed., 1988), p. 328-9. # local frst, scnd, count until *h = 1 do { h[1] :=: h[*h] # Reverse first and last elements. frst := pull(h) # Pop last elem off & save it. resettle_heap(h, 1) # Resettle the heap. scnd := !h # Save (but don't clobber) top element. count := frst.n + scnd.n frst := { if *frst = 2 then frst.c else _N(frst.l, frst.r) } scnd := { if *scnd = 2 then scnd.c else _N(scnd.l, scnd.r) } h[1] := node(frst, scnd, count) # Create new node(). resettle_heap(h, 1) # Resettle once again. } # H is no longer a stack. It's single element - the root of a # Huffman tree made up of node()s and leaf()s. Put the l and r # fields of that element into an _N record, and return the new # record. return _N(h[1].l, h[1].r) end procedure hash_huffcodes(tr) # # Hash Huffman codes. Tr (arg 1) is a Huffman tree created by # heap_2_huffman_tree(heap). Output is a table, with the keys # representing characters, and the values being records of type # hcode(i,len), where i is the Huffcode (an integer) and len is # the number of bits it occupies. # local code, huffman_table huffman_table := table() every code := build_codes(tr) do insert(huffman_table, code.c, code) return huffman_table end procedure build_codes(tr, i, len) # # Decompose Huffman tree tr into hcode() records which contain # 3 fields: c (the character encoded), i (its integer code), # and len (the number of bytes the integer code occupies). Sus- # pend one such record for each character encoded in tree tr. # if type(tr) == "string" then return hcode(tr, i, len) else { (/len := 1) | (len +:= 1) (/i := 0) | (i *:= 2) suspend build_codes(tr.l, i, len) i +:= 1 suspend build_codes(tr.r, i, len) } end procedure block_encode(s, huffman_table) # # Write to file f string s encoded using huffman_table (a table having # chars as keys and huffman codes as values). # # Create huffman_table as follows (char_tbl is a table, with chars as # keys and frequencies as values): # # heap := heap_init(char_tbl) # hufftree := heap_2_huffman_tree(heap) # huffman_table := hash_huffcodes(hufftree) # # Store the tree, hufftree. Pass the huffman table to block_encode as # its second argument. local s2, size, hcode_4_chr, chr *s > 2r1111111111111111 & stop("write_string: too many characters in s") s2 := "" # initialize size string outbits() # just in case every s2 ||:= outbits(*s, 16) # block size = 2 bytes s ? { while chr := move(1) do { hcode_4_chr := \huffman_table[chr] | stop("write_string: unexpected char, ",image(chr)) every s2 ||:= outbits(hcode_4_chr.i, hcode_4_chr.len) } s2 ||:= outbits() } return s2 end procedure block_decode(f, huff_tree) # Undo what block_encode does. local how_many, s2, E, chr, bit s2 := "" # The first two bytes record how many characters the original # text had in it. If the read fails, it means that the file # system filled up while making the index, and the bitmaps now # can't be located in f. how_many := ishift(ord(reads(f)), 8) + ord(reads(f)) | stop("block_decode: failure reading ",image(f)) # If the original text was blank (zero characters), then return # an empty string. if how_many = 0 then { return "" } reads(f, how_many) ? { # Otherwise, set E = to the top node of the Huffman tree, and # begin decoding. E := huff_tree while chr := move(1) do { every bit := iand(1, ishift(ord(chr), -7 to 0)) do { E := { if bit = 0 then E.l else E.r } if s2 ||:= string(E) then { if *s2 = how_many then return s2 else E := huff_tree } } } } # If we get to here, something is quite amiss! stop("read_string: bad character count") end