#!/usr/bin/python """testing stuff for huffman compression, based on gumuz' and others this is public domain!""" import sys, types def huffwalk(*args): """we're using lists of lists... offset [-1] will either be: a list of length 3, which is a number and two branches, or a list of length 2, just a number and letter, or just a number, which means a list or a letter is at offset -2""" try: code, branch, codes = args except: return huffwalk('', huffmantree('test'), ()) if len(branch) == 0: return codes elif type(branch[-1]) == types.IntType: if type(branch[-2]) == types.StringType: return codes + tuple(branch) + (code,) else: # walk the left and right branches recursively return huffwalk(code + '0', branch[-2], codes) + \ huffwalk(code + '1', branch[-3], codes) else: # we should only get here on the first iteration if at all if len(code) > 0: raise Exception, 'lost! %s' % repr((branch, code, codes)) return huffwalk('', branch[-1], codes) def huffmantree(*args): if args[0] == 'test': # example at http://www.maths.abdn.ac.uk/~igc/tch/mx4002/notes/node59.html nodes = [['i', 6], ['a', 3], ['b', 3], ['d', 2], ['m', 4], ['e', 5], ['o', 3], ['c', 1], ['f', 1], ['g', 2], ['s', 4], ['t', 3], ['l', 2], ['r', 2], ['n', 5], ['p', 1], ['u', 2], [' ', 11]] else: nodes = wordfrequencies(args) while len(nodes) > 1: combined = [nodes.pop(0), nodes.pop(0)] # get the 2 lowest-weight nodes combined.append(combined[0][-1] + combined[1][-1]) # add the weights #DebugPrint('combined lowest nodes: ', combined) nodes.append(combined) # plop new node into the existing list nodes.sort(lambda a, b: cmp(a[-1], b[-1])) # re-sort for lowest weights return nodes if __name__ == "__main__": sys.stderr.write('running huffman test\n') print repr(huffwalk('test')) else: sys.stderr.write('loaded huffman module\n')