Huffman coding

You are encouraged to You are encouraged to solve this task according to the task description, using any language you may know.

Huffman encoding is a way to assign binary codes to symbols that reduces the overall number of bits used to encode a typical string of those symbols.

For example, if you use letters as symbols and have details of the frequency of occurrence of those letters in typical strings, then you could just encode each letter with a fixed number of bits, such as in ASCII codes. You can do better than this by encoding more frequently occurring letters such as e and a, with smaller bit strings; and less frequently occurring letters such as q and x with longer bit strings.

Any string of letters will be encoded as a string of bits that are no-longer of the same length per letter. To successfully decode such as string, the smaller codes assigned to letters such as 'e' cannot occur as a prefix in the larger codes such as that for 'x'.

If you were to assign a code 01 for 'e' and code 011 for 'x', then if the bits to decode started as 011... then you would not know if you should decode an 'e' or an 'x'.

The Huffman coding scheme takes each symbol and its weight (or frequency of occurrence), and generates proper encodings for each symbol taking account of the weights of each symbol, so that higher weighted symbols have fewer bits in their encoding. (See the WP article for more information).

A Huffman encoding can be computed by first creating a tree of nodes:

Create a leaf node for each symbol and add it to the priority queue. While there is more than one node in the queue: Remove the node of highest priority (lowest probability) twice to get two nodes. Create a new internal node with these two nodes as children and with probability equal to the sum of the two nodes' probabilities. Add the new node to the queue. The remaining node is the root node and the tree is complete.



Traverse the constructed binary tree from root to leaves assigning and accumulating a '0' for one branch and a '1' for the other at each node. The accumulated zeros and ones at each leaf constitute a Huffman encoding for those symbols and weights:





Task

Using the characters and their frequency from the string:

this is an example for huffman encoding

create a program to generate a Huffman encoding for each character as a table.





Works with: Ada 2005

huffman.ads:

with Ada. Containers . Indefinite_Ordered_Maps ;

with Ada. Containers . Ordered_Maps ;

with Ada. Finalization ;

generic

type Symbol_Type is private ;

with function "<" ( Left, Right : Symbol_Type ) return Boolean is <>;

with procedure Put ( Item : Symbol_Type ) ;

type Symbol_Sequence is array ( Positive range <> ) of Symbol_Type;

type Frequency_Type is private ;

with function "+" ( Left, Right : Frequency_Type ) return Frequency_Type

is <>;

with function "<" ( Left, Right : Frequency_Type ) return Boolean is <>;

package Huffman is

-- bits = booleans (true/false = 1/0)

type Bit_Sequence is array ( Positive range <> ) of Boolean;

Zero_Sequence : constant Bit_Sequence ( 1 .. 0 ) := ( others => False ) ;

-- output the sequence

procedure Put ( Code : Bit_Sequence ) ;



-- type for freqency map

package Frequency_Maps is new Ada. Containers . Ordered_Maps

( Element_Type => Frequency_Type,

Key_Type => Symbol_Type ) ;



type Huffman_Tree is private ;

-- create a huffman tree from frequency map

procedure Create_Tree

( Tree : out Huffman_Tree;

Frequencies : Frequency_Maps. Map ) ;

-- encode a single symbol

function Encode

( Tree : Huffman_Tree;

Symbol : Symbol_Type )

return Bit_Sequence;

-- encode a symbol sequence

function Encode

( Tree : Huffman_Tree;

Symbols : Symbol_Sequence )

return Bit_Sequence;

-- decode a bit sequence

function Decode

( Tree : Huffman_Tree;

Code : Bit_Sequence )

return Symbol_Sequence;

-- dump the encoding table

procedure Dump_Encoding ( Tree : Huffman_Tree ) ;

private

-- type for encoding map

package Encoding_Maps is new Ada. Containers . Indefinite_Ordered_Maps

( Element_Type => Bit_Sequence,

Key_Type => Symbol_Type ) ;



type Huffman_Node;

type Node_Access is access Huffman_Node;

-- a node is either internal (left_child/right_child used)

-- or a leaf (left_child/right_child are null)

type Huffman_Node is record

Frequency : Frequency_Type;

Left_Child : Node_Access := null ;

Right_Child : Node_Access := null ;

Symbol : Symbol_Type;

end record ;

-- create a leaf node

function Create_Node

( Symbol : Symbol_Type;

Frequency : Frequency_Type )

return Node_Access;

-- create an internal node

function Create_Node ( Left, Right : Node_Access ) return Node_Access;

-- fill the encoding map

procedure Fill

( The_Node : Node_Access;

Map : in out Encoding_Maps. Map ;

Prefix : Bit_Sequence ) ;



-- huffman tree has a tree and an encoding map

type Huffman_Tree is new Ada. Finalization . Controlled with record

Tree : Node_Access := null ;

Map : Encoding_Maps. Map := Encoding_Maps. Empty_Map ;

end record ;

-- free memory after finalization

overriding procedure Finalize ( Object : in out Huffman_Tree ) ;

end Huffman;

huffman.adb:

with Ada. Text_IO ;

with Ada. Unchecked_Deallocation ;

with Ada. Containers . Vectors ;

package body Huffman is

package Node_Vectors is new Ada. Containers . Vectors

( Element_Type => Node_Access,

Index_Type => Positive ) ;



function "<" ( Left, Right : Node_Access ) return Boolean is

begin

-- compare frequency

if Left. Frequency < Right. Frequency then

return True;

elsif Right. Frequency < Left. Frequency then

return False;

end if ;

-- same frequency, choose leaf node

if Left. Left_Child = null and then Right. Left_Child /= null then

return True;

elsif Left. Left_Child /= null and then Right. Left_Child = null then

return False;

end if ;

-- same frequency, same node type (internal/leaf)

if Left. Left_Child /= null then

-- for internal nodes, compare left children, then right children

if Left. Left_Child < Right. Left_Child then

return True;

elsif Right. Left_Child < Left. Left_Child then

return False;

else

return Left. Right_Child < Right. Right_Child ;

end if ;

else

-- for leaf nodes, compare symbol

return Left. Symbol < Right. Symbol ;

end if ;

end "<" ;

package Node_Vector_Sort is new Node_Vectors. Generic_Sorting ;



procedure Create_Tree

( Tree : out Huffman_Tree;

Frequencies : Frequency_Maps. Map ) is

Node_Queue : Node_Vectors. Vector := Node_Vectors. Empty_Vector ;

begin

-- insert all leafs into the queue

declare

use Frequency_Maps;

Position : Cursor := Frequencies. First ;

The_Node : Node_Access := null ;

begin

while Position /= No_Element loop

The_Node :=

Create_Node

( Symbol => Key ( Position ) ,

Frequency => Element ( Position ) ) ;

Node_Queue. Append ( The_Node ) ;

Next ( Position ) ;

end loop ;

end ;

-- sort by frequency (see "<")

Node_Vector_Sort. Sort ( Node_Queue ) ;

-- iterate over all elements

while not Node_Queue. Is_Empty loop

declare

First : constant Node_Access := Node_Queue. First_Element ;

begin

Node_Queue. Delete_First ;

-- if we only have one node left, it is the root node of the tree

if Node_Queue. Is_Empty then

Tree. Tree := First;

else

-- create new internal node with two smallest frequencies

declare

Second : constant Node_Access := Node_Queue. First_Element ;

begin

Node_Queue. Delete_First ;

Node_Queue. Append ( Create_Node ( First, Second ) ) ;

end ;

Node_Vector_Sort. Sort ( Node_Queue ) ;

end if ;

end ;

end loop ;

-- fill encoding map

Fill ( The_Node => Tree. Tree , Map => Tree. Map , Prefix => Zero_Sequence ) ;

end Create_Tree;



-- create leaf node

function Create_Node

( Symbol : Symbol_Type;

Frequency : Frequency_Type )

return Node_Access

is

Result : Node_Access := new Huffman_Node;

begin

Result. Frequency := Frequency;

Result. Symbol := Symbol;

return Result;

end Create_Node;



-- create internal node

function Create_Node ( Left, Right : Node_Access ) return Node_Access is

Result : Node_Access := new Huffman_Node;

begin

Result. Frequency := Left. Frequency + Right. Frequency ;

Result. Left_Child := Left;

Result. Right_Child := Right;

return Result;

end Create_Node;



-- fill encoding map

procedure Fill

( The_Node : Node_Access;

Map : in out Encoding_Maps. Map ;

Prefix : Bit_Sequence ) is

begin

if The_Node. Left_Child /= null then

-- append false (0) for left child

Fill ( The_Node. Left_Child , Map, Prefix & False ) ;

-- append true (1) for right child

Fill ( The_Node. Right_Child , Map, Prefix & True ) ;

else

-- leaf node reached, prefix = code for symbol

Map. Insert ( The_Node. Symbol , Prefix ) ;

end if ;

end Fill;



-- free memory after finalization

overriding procedure Finalize ( Object : in out Huffman_Tree ) is

procedure Free is new Ada. Unchecked_Deallocation

( Name => Node_Access,

Object => Huffman_Node ) ;

-- recursively free all nodes

procedure Recursive_Free ( The_Node : in out Node_Access ) is

begin

-- free node if it is a leaf

if The_Node. Left_Child = null then

Free ( The_Node ) ;

else

-- free left and right child if node is internal

Recursive_Free ( The_Node. Left_Child ) ;

Recursive_Free ( The_Node. Right_Child ) ;

-- free node afterwards

Free ( The_Node ) ;

end if ;

end Recursive_Free;

begin

-- recursively free root node

Recursive_Free ( Object. Tree ) ;

end Finalize;



-- encode single symbol

function Encode

( Tree : Huffman_Tree;

Symbol : Symbol_Type )

return Bit_Sequence

is

begin

-- simply lookup in map

return Tree. Map . Element ( Symbol ) ;

end Encode;



-- encode symbol sequence

function Encode

( Tree : Huffman_Tree;

Symbols : Symbol_Sequence )

return Bit_Sequence

is

begin

-- only one element

if Symbols'Length = 1 then

-- see above

return Encode ( Tree, Symbols ( Symbols'First ) ) ;

else

-- encode first element, append result of recursive call

return Encode ( Tree, Symbols ( Symbols'First ) ) &

Encode ( Tree, Symbols ( Symbols'First + 1 .. Symbols 'Last ) ) ;

end if ;

end Encode;



-- decode a bit sequence

function Decode

( Tree : Huffman_Tree;

Code : Bit_Sequence )

return Symbol_Sequence

is

-- maximum length = code length

Result : Symbol_Sequence ( 1 .. Code 'Length ) ;

-- last used index of result

Last : Natural := 0 ;

The_Node : Node_Access := Tree. Tree ;

begin

-- iterate over the code

for I in Code' Range loop

-- if current element is true, descent the right branch

if Code ( I ) then

The_Node := The_Node. Right_Child ;

else

-- false: descend left branch

The_Node := The_Node. Left_Child ;

end if ;

if The_Node. Left_Child = null then

-- reached leaf node: append symbol to result

Last := Last + 1 ;

Result ( Last ) := The_Node. Symbol ;

-- reset current node to root

The_Node := Tree. Tree ;

end if ;

end loop ;

-- return subset of result array

return Result ( 1 .. Last ) ;

end Decode;



-- output a bit sequence

procedure Put ( Code : Bit_Sequence ) is

package Int_IO is new Ada. Text_IO . Integer_IO ( Integer ) ;

begin

for I in Code' Range loop

if Code ( I ) then

-- true = 1

Int_IO. Put ( 1 , 0 ) ;

else

-- false = 0

Int_IO. Put ( 0 , 0 ) ;

end if ;

end loop ;

Ada. Text_IO . New_Line ;

end Put;



-- dump encoding map

procedure Dump_Encoding ( Tree : Huffman_Tree ) is

use type Encoding_Maps. Cursor ;

Position : Encoding_Maps. Cursor := Tree. Map . First ;

begin

-- iterate map

while Position /= Encoding_Maps. No_Element loop

-- key

Put ( Encoding_Maps. Key ( Position ) ) ;

Ada. Text_IO . Put ( " = " ) ;

-- code

Put ( Encoding_Maps. Element ( Position ) ) ;

Encoding_Maps. Next ( Position ) ;

end loop ;

end Dump_Encoding;

end Huffman;

example main.adb:

with Ada. Text_IO ;

with Huffman;

procedure Main is

package Char_Natural_Huffman_Tree is new Huffman

( Symbol_Type => Character,

Put => Ada. Text_IO . Put ,

Symbol_Sequence => String,

Frequency_Type => Natural ) ;

Tree : Char_Natural_Huffman_Tree. Huffman_Tree ;

Frequencies : Char_Natural_Huffman_Tree. Frequency_Maps . Map ;

Input_String : constant String :=

"this is an example for huffman encoding" ;

begin

-- build frequency map

for I in Input_String' Range loop

declare

use Char_Natural_Huffman_Tree. Frequency_Maps ;

Position : constant Cursor := Frequencies. Find ( Input_String ( I ) ) ;

begin

if Position = No_Element then

Frequencies. Insert ( Key => Input_String ( I ) , New_Item => 1 ) ;

else

Frequencies. Replace_Element

( Position => Position,

New_Item => Element ( Position ) + 1 ) ;

end if ;

end ;

end loop ;



-- create huffman tree

Char_Natural_Huffman_Tree. Create_Tree

( Tree => Tree,

Frequencies => Frequencies ) ;



-- dump encodings

Char_Natural_Huffman_Tree. Dump_Encoding ( Tree => Tree ) ;



-- encode example string

declare

Code : constant Char_Natural_Huffman_Tree. Bit_Sequence :=

Char_Natural_Huffman_Tree. Encode

( Tree => Tree,

Symbols => Input_String ) ;

begin

Char_Natural_Huffman_Tree. Put ( Code ) ;

Ada. Text_IO . Put_Line

( Char_Natural_Huffman_Tree. Decode ( Tree => Tree, Code => Code ) ) ;

end ;

end Main;

Output:

= 101 a = 1001 c = 01010 d = 01011 e = 1100 f = 1101 g = 01100 h = 11111 i = 1110 l = 01101 m = 0010 n = 000 o = 0011 p = 01110 r = 01111 s = 0100 t = 10000 u = 10001 x = 11110 1000011111111001001011110010010110010001011100111101001001001110011011100101110100110111110111111100011101110100101001000101110000001010001101011111000001100 this is an example for huffman encoding

This example is incorrect. Please fix the code and remove this message. Details: Huffman code can not contain another code as a prefix

Works with: BBC BASIC for Windows

INSTALL @lib$+"SORTSALIB"

SortUp% = FN_sortSAinit(0,0) : REM Ascending

SortDn% = FN_sortSAinit(1,0) : REM Descending



Text$ = "this is an example for huffman encoding"



DIM tree{(127) ch&, num%, lkl%, lkr%}

FOR i% = 1 TO LEN(Text$)

c% = ASCMID$(Text$,i%)

tree{(c%)}.ch& = c%

tree{(c%)}.num% += 1

NEXT



C% = DIM(tree{()},1) + 1

CALL SortDn%, tree{()}, tree{(0)}.num%

FOR i% = 0 TO DIM(tree{()},1)

IF tree{(i%)}.num% = 0 EXIT FOR

NEXT

size% = i%



linked% = 0

REPEAT

C% = size%

CALL SortUp%, tree{()}, tree{(0)}.num%

i% = 0 : WHILE tree{(i%)}.lkl% OR tree{(i%)}.lkr% i% += 1 : ENDWHILE

tree{(i%)}.lkl% = size%

j% = 0 : WHILE tree{(j%)}.lkl% OR tree{(j%)}.lkr% j% += 1 : ENDWHILE

tree{(j%)}.lkr% = size%

linked% += 2

tree{(size%)}.num% = tree{(i%)}.num% + tree{(j%)}.num%

size% += 1

UNTIL linked% = (size% - 1)



FOR i% = size% - 1 TO 0 STEP -1

IF tree{(i%)}.ch& THEN

h$ = ""

j% = i%

REPEAT

CASE TRUE OF

WHEN tree{(j%)}.lkl% <> 0:

h$ = "0" + h$

j% = tree{(j%)}.lkl%

WHEN tree{(j%)}.lkr% <> 0:

h$ = "1" + h$

j% = tree{(j%)}.lkr%

OTHERWISE:

EXIT REPEAT

ENDCASE

UNTIL FALSE

VDU tree{(i%)}.ch& : PRINT " " h$

ENDIF

NEXT

END

Output:

101 n 000 e 1110 f 1101 a 1100 i 1011 s 0110 m 0101 h 0100 o 0011 c 0010 l 0001 r 0000 x 11111 p 11110 d 11101 u 11100 g 11011 t 11010

( "this is an example for huffman encoding":?S

& 0:?chars

& 0:?p

& ( @( !S

: ?

( [!p %?char [?p ?

& !char+!chars:?chars

& ~

)

)

|

)

& 0:?prioritized

& whl

' ( !chars:?n*%@?w+?chars

& (!n.!w)+!prioritized:?prioritized

)

& whl

' ( !prioritized:(?p.?x)+(?q.?y)+?nprioritized

& (!p+!q.(!p.0,!x)+(!q.1,!y))+!nprioritized:?prioritized

)

& 0:?L

& ( walk

= bits tree bit subtree

. !arg:(?bits.?tree)

& whl

' ( !tree:(?p.?bit,?subtree)+?tree

& ( !subtree:@

& (!subtree.str$(!bits !bit))+!L:?L

| walk$(!bits !bit.!subtree)

)

)

)

& !prioritized:(?.?prioritized)

& walk$(.!prioritized)

& lst$L

& :?encoded

& 0:?p

& ( @( !S

: ?

( [!p %?char [?p ?

& !L:?+(!char.?code)+?

& !encoded !code:?encoded

& ~

)

)

| out$(str$!encoded)

)

& ( decode

= char bits

. !L

: ?+(?char.?bits&@(!arg:!bits ?arg))+?

& !char decode$!arg

| !arg

)

& out$("decoded:" str$(decode$(str$!encoded)));

Output:

(L= (" ".101) + (a.1001) + (c.01010) + (d.01011) + (e.1100) + (f.1101) + (g.01100) + (h.11111) + (i.1110) + (l.01101) + (m.0010) + (n.000) + (o.0011) + (p.01110) + (r.01111) + (s.0100) + (t.10000) + (u.10001) + (x.11110)); 1000011111111001001011110010010110010001011100111101001001001110011011100101110100110111110111111100011101110100101001000101110000001010001101011111000001100 decoded: this is an example for huffman encoding

This code lacks a lot of needed checkings, especially for memory allocation.

#include <stdio.h>

#include <stdlib.h>

#include <string.h>



#define BYTES 256



struct huffcode {

int nbits ;

int code ;

} ;

typedef struct huffcode huffcode_t ;



struct huffheap {

int * h ;

int n , s , cs ;

long * f ;

} ;

typedef struct huffheap heap_t ;



/* heap handling funcs */

static heap_t * _heap_create ( int s , long * f )

{

heap_t * h ;

h = malloc ( sizeof ( heap_t ) ) ;

h -> h = malloc ( sizeof ( int ) * s ) ;

h -> s = h -> cs = s ;

h -> n = 0 ;

h -> f = f ;

return h ;

}



static void _heap_destroy ( heap_t * heap )

{

free ( heap -> h ) ;

free ( heap ) ;

}



#define swap_(I,J) do { int t_; t_ = a[(I)]; \

a[(I)] = a[(J)]; a[(J)] = t_; } while(0)

static void _heap_sort ( heap_t * heap )

{

int i = 1 , j = 2 ; /* gnome sort */

int * a = heap -> h ;



while ( i < heap -> n ) { /* smaller values are kept at the end */

if ( heap -> f [ a [ i - 1 ] ] >= heap -> f [ a [ i ] ] ) {

i = j ; j ++;

} else {

swap_ ( i - 1 , i ) ;

i --;

i = ( i == 0 ) ? j ++ : i ;

}

}

}

#undef swap_



static void _heap_add ( heap_t * heap , int c )

{

if ( ( heap -> n + 1 ) > heap -> s ) {

heap -> h = realloc ( heap -> h , heap -> s + heap -> cs ) ;

heap -> s += heap -> cs ;

}

heap -> h [ heap -> n ] = c ;

heap -> n ++;

_heap_sort ( heap ) ;

}



static int _heap_remove ( heap_t * heap )

{

if ( heap -> n > 0 ) {

heap -> n --;

return heap -> h [ heap -> n ] ;

}

return - 1 ;

}



/* huffmann code generator */

huffcode_t ** create_huffman_codes ( long * freqs )

{

huffcode_t ** codes ;

heap_t * heap ;

long efreqs [ BYTES * 2 ] ;

int preds [ BYTES * 2 ] ;

int i , extf = BYTES ;

int r1 , r2 ;



memcpy ( efreqs , freqs , sizeof ( long ) * BYTES ) ;

memset ( & efreqs [ BYTES ] , 0 , sizeof ( long ) * BYTES ) ;



heap = _heap_create ( BYTES * 2 , efreqs ) ;

if ( heap == NULL ) return NULL ;



for ( i = 0 ; i < BYTES ; i ++ ) if ( efreqs [ i ] > 0 ) _heap_add ( heap , i ) ;



while ( heap -> n > 1 )

{

r1 = _heap_remove ( heap ) ;

r2 = _heap_remove ( heap ) ;

efreqs [ extf ] = efreqs [ r1 ] + efreqs [ r2 ] ;

_heap_add ( heap , extf ) ;

preds [ r1 ] = extf ;

preds [ r2 ] = - extf ;

extf ++;

}

r1 = _heap_remove ( heap ) ;

preds [ r1 ] = r1 ;

_heap_destroy ( heap ) ;



codes = malloc ( sizeof ( huffcode_t * ) * BYTES ) ;



int bc , bn , ix ;

for ( i = 0 ; i < BYTES ; i ++ ) {

bc = 0 ; bn = 0 ;

if ( efreqs [ i ] == 0 ) { codes [ i ] = NULL ; continue ; }

ix = i ;

while ( abs ( preds [ ix ] ) != ix ) {

bc |= ( ( preds [ ix ] >= 0 ) ? 1 : 0 ) << bn ;

ix = abs ( preds [ ix ] ) ;

bn ++;

}

codes [ i ] = malloc ( sizeof ( huffcode_t ) ) ;

codes [ i ] -> nbits = bn ;

codes [ i ] -> code = bc ;

}

return codes ;

}



void free_huffman_codes ( huffcode_t ** c )

{

int i ;



for ( i = 0 ; i < BYTES ; i ++ ) free ( c [ i ] ) ;

free ( c ) ;

}



#define MAXBITSPERCODE 100



void inttobits ( int c , int n , char * s )

{

s [ n ] = 0 ;

while ( n > 0 ) {

s [ n - 1 ] = ( c % 2 ) + '0' ;

c >>= 1 ; n --;

}

}



const char * test = "this is an example for huffman encoding" ;



int main ( )

{

huffcode_t ** r ;

int i ;

char strbit [ MAXBITSPERCODE ] ;

const char * p ;

long freqs [ BYTES ] ;



memset ( freqs , 0 , sizeof freqs ) ;



p = test ;

while ( * p != ' \0 ' ) freqs [ * p ++ ] ++;



r = create_huffman_codes ( freqs ) ;



for ( i = 0 ; i < BYTES ; i ++ ) {

if ( r [ i ] != NULL ) {

inttobits ( r [ i ] -> code , r [ i ] -> nbits , strbit ) ;

printf ( "%c (%d) %s

" , i , r [ i ] -> code , strbit ) ;

}

}



free_huffman_codes ( r ) ;



return 0 ;

}

Alternative [ edit ]

Using a simple heap-based priority queue. Heap is an array, while ndoe tree is done by binary links.

#include <stdio.h>

#include <string.h>



typedef struct node_t {

struct node_t * left , * right ;

int freq ;

char c ;

} * node ;



struct node_t pool [ 256 ] = { { 0 } } ;

node qqq [ 255 ] , * q = qqq - 1 ;

int n_nodes = 0 , qend = 1 ;

char * code [ 128 ] = { 0 } , buf [ 1024 ] ;



node new_node ( int freq , char c , node a , node b )

{

node n = pool + n_nodes ++;

if ( freq ) n -> c = c , n -> freq = freq ;

else {

n -> left = a , n -> right = b ;

n -> freq = a -> freq + b -> freq ;

}

return n ;

}



/* priority queue */

void qinsert ( node n )

{

int j , i = qend ++;

while ( ( j = i / 2 ) ) {

if ( q [ j ] -> freq <= n -> freq ) break ;

q [ i ] = q [ j ] , i = j ;

}

q [ i ] = n ;

}



node qremove ( )

{

int i , l ;

node n = q [ i = 1 ] ;



if ( qend < 2 ) return 0 ;

qend --;

while ( ( l = i * 2 ) < qend ) {

if ( l + 1 < qend && q [ l + 1 ] -> freq < q [ l ] -> freq ) l ++;

q [ i ] = q [ l ] , i = l ;

}

q [ i ] = q [ qend ] ;

return n ;

}



/* walk the tree and put 0s and 1s */

void build_code ( node n , char * s , int len )

{

static char * out = buf ;

if ( n -> c ) {

s [ len ] = 0 ;

strcpy ( out , s ) ;

code [ n -> c ] = out ;

out += len + 1 ;

return ;

}



s [ len ] = '0' ; build_code ( n -> left , s , len + 1 ) ;

s [ len ] = '1' ; build_code ( n -> right , s , len + 1 ) ;

}



void init ( const char * s )

{

int i , freq [ 128 ] = { 0 } ;

char c [ 16 ] ;



while ( * s ) freq [ ( int ) * s ++ ] ++;



for ( i = 0 ; i < 128 ; i ++ )

if ( freq [ i ] ) qinsert ( new_node ( freq [ i ] , i , 0 , 0 ) ) ;



while ( qend > 2 )

qinsert ( new_node ( 0 , 0 , qremove ( ) , qremove ( ) ) ) ;



build_code ( q [ 1 ] , c , 0 ) ;

}



void encode ( const char * s , char * out )

{

while ( * s ) {

strcpy ( out , code [ * s ] ) ;

out += strlen ( code [ * s ++ ] ) ;

}

}



void decode ( const char * s , node t )

{

node n = t ;

while ( * s ) {

if ( * s ++ == '0' ) n = n -> left ;

else n = n -> right ;



if ( n -> c ) putchar ( n -> c ) , n = t ;

}



putchar ( '

' ) ;

if ( t != n ) printf ( "garbage input

" ) ;

}



int main ( void )

{

int i ;

const char * str = "this is an example for huffman encoding" ;

char buf [ 1024 ] ;



init ( str ) ;

for ( i = 0 ; i < 128 ; i ++ )

if ( code [ i ] ) printf ( "'%c': %s

" , i , code [ i ] ) ;



encode ( str , buf ) ;

printf ( "encoded: %s

" , buf ) ;



printf ( "decoded: " ) ;

decode ( buf , q [ 1 ] ) ;



return 0 ;

}

Output:

' ': 000 'a': 1000 'c': 01101 'd': 01100 'e': 0101 'f': 0010 'g': 010000 'h': 1101 'i': 0011 'l': 010001 'm': 1111 'n': 101 'o': 1110 'p': 10011 'r': 10010 's': 1100 't': 01111 'u': 01110 'x': 01001 encoded: 0111111010011110000000111100000100010100001010100110001111100110100010101000001011101001000011010111000100010111110001010000101101011011110011000011101010000 decoded: this is an example for huffman encoding

using System ;

using System.Collections.Generic ;



namespace Huffman_Encoding

{

public class PriorityQueue < T > where T : IComparable

{

protected List < T > LstHeap = new List < T > ( ) ;



public virtual int Count

{

get { return LstHeap . Count ; }

}



public virtual void Add ( T val )

{

LstHeap . Add ( val ) ;

SetAt ( LstHeap . Count - 1 , val ) ;

UpHeap ( LstHeap . Count - 1 ) ;

}



public virtual T Peek ( )

{

if ( LstHeap . Count == 0 )

{

throw new IndexOutOfRangeException ( "Peeking at an empty priority queue" ) ;

}



return LstHeap [ 0 ] ;

}



public virtual T Pop ( )

{

if ( LstHeap . Count == 0 )

{

throw new IndexOutOfRangeException ( "Popping an empty priority queue" ) ;

}



T valRet = LstHeap [ 0 ] ;



SetAt ( 0 , LstHeap [ LstHeap . Count - 1 ] ) ;

LstHeap . RemoveAt ( LstHeap . Count - 1 ) ;

DownHeap ( 0 ) ;

return valRet ;

}



protected virtual void SetAt ( int i, T val )

{

LstHeap [ i ] = val ;

}



protected bool RightSonExists ( int i )

{

return RightChildIndex ( i ) < LstHeap . Count ;

}



protected bool LeftSonExists ( int i )

{

return LeftChildIndex ( i ) < LstHeap . Count ;

}



protected int ParentIndex ( int i )

{

return ( i - 1 ) / 2 ;

}



protected int LeftChildIndex ( int i )

{

return 2 * i + 1 ;

}



protected int RightChildIndex ( int i )

{

return 2 * ( i + 1 ) ;

}



protected T ArrayVal ( int i )

{

return LstHeap [ i ] ;

}



protected T Parent ( int i )

{

return LstHeap [ ParentIndex ( i ) ] ;

}



protected T Left ( int i )

{

return LstHeap [ LeftChildIndex ( i ) ] ;

}



protected T Right ( int i )

{

return LstHeap [ RightChildIndex ( i ) ] ;

}



protected void Swap ( int i, int j )

{

T valHold = ArrayVal ( i ) ;

SetAt ( i, LstHeap [ j ] ) ;

SetAt ( j, valHold ) ;

}



protected void UpHeap ( int i )

{

while ( i > 0 && ArrayVal ( i ) . CompareTo ( Parent ( i ) ) > 0 )

{

Swap ( i, ParentIndex ( i ) ) ;

i = ParentIndex ( i ) ;

}

}



protected void DownHeap ( int i )

{

while ( i >= 0 )

{

int iContinue = - 1 ;



if ( RightSonExists ( i ) && Right ( i ) . CompareTo ( ArrayVal ( i ) ) > 0 )

{

iContinue = Left ( i ) . CompareTo ( Right ( i ) ) < 0 ? RightChildIndex ( i ) : LeftChildIndex ( i ) ;

}

else if ( LeftSonExists ( i ) && Left ( i ) . CompareTo ( ArrayVal ( i ) ) > 0 )

{

iContinue = LeftChildIndex ( i ) ;

}



if ( iContinue >= 0 && iContinue < LstHeap . Count )

{

Swap ( i, iContinue ) ;

}



i = iContinue ;

}

}

}



internal class HuffmanNode < T > : IComparable

{

internal HuffmanNode ( double probability, T value )

{

Probability = probability ;

LeftSon = RightSon = Parent = null ;

Value = value ;

IsLeaf = true ;

}



internal HuffmanNode ( HuffmanNode < T > leftSon, HuffmanNode < T > rightSon )

{

LeftSon = leftSon ;

RightSon = rightSon ;

Probability = leftSon . Probability + rightSon . Probability ;

leftSon . IsZero = true ;

rightSon . IsZero = false ;

leftSon . Parent = rightSon . Parent = this ;

IsLeaf = false ;

}



internal HuffmanNode < T > LeftSon { get ; set ; }

internal HuffmanNode < T > RightSon { get ; set ; }

internal HuffmanNode < T > Parent { get ; set ; }

internal T Value { get ; set ; }

internal bool IsLeaf { get ; set ; }



internal bool IsZero { get ; set ; }



internal int Bit

{

get { return IsZero ? 0 : 1 ; }

}



internal bool IsRoot

{

get { return Parent == null ; }

}



internal double Probability { get ; set ; }



public int CompareTo ( object obj )

{

return - Probability . CompareTo ( ( ( HuffmanNode < T > ) obj ) . Probability ) ;

}

}



public class Huffman < T > where T : IComparable

{

private readonly Dictionary < T, HuffmanNode < T >> _leafDictionary = new Dictionary < T, HuffmanNode < T >> ( ) ;

private readonly HuffmanNode < T > _root ;



public Huffman ( IEnumerable < T > values )

{

var counts = new Dictionary < T, int > ( ) ;

var priorityQueue = new PriorityQueue < HuffmanNode < T >> ( ) ;

int valueCount = 0 ;



foreach ( T value in values )

{

if ( ! counts . ContainsKey ( value ) )

{

counts [ value ] = 0 ;

}

counts [ value ] ++;

valueCount ++;

}



foreach ( T value in counts . Keys )

{

var node = new HuffmanNode < T > ( ( double ) counts [ value ] / valueCount, value ) ;

priorityQueue . Add ( node ) ;

_leafDictionary [ value ] = node ;

}



while ( priorityQueue . Count > 1 )

{

HuffmanNode < T > leftSon = priorityQueue . Pop ( ) ;

HuffmanNode < T > rightSon = priorityQueue . Pop ( ) ;

var parent = new HuffmanNode < T > ( leftSon, rightSon ) ;

priorityQueue . Add ( parent ) ;

}



_root = priorityQueue . Pop ( ) ;

_root . IsZero = false ;

}



public List < int > Encode ( T value )

{

var returnValue = new List < int > ( ) ;

Encode ( value , returnValue ) ;

return returnValue ;

}



public void Encode ( T value , List < int > encoding )

{

if ( ! _leafDictionary . ContainsKey ( value ) )

{

throw new ArgumentException ( "Invalid value in Encode" ) ;

}

HuffmanNode < T > nodeCur = _leafDictionary [ value ] ;

var reverseEncoding = new List < int > ( ) ;

while ( ! nodeCur . IsRoot )

{

reverseEncoding . Add ( nodeCur . Bit ) ;

nodeCur = nodeCur . Parent ;

}



reverseEncoding . Reverse ( ) ;

encoding . AddRange ( reverseEncoding ) ;

}



public List < int > Encode ( IEnumerable < T > values )

{

var returnValue = new List < int > ( ) ;



foreach ( T value in values )

{

Encode ( value , returnValue ) ;

}

return returnValue ;

}



public T Decode ( List < int > bitString, ref int position )

{

HuffmanNode < T > nodeCur = _root ;

while ( ! nodeCur . IsLeaf )

{

if ( position > bitString . Count )

{

throw new ArgumentException ( "Invalid bitstring in Decode" ) ;

}

nodeCur = bitString [ position ++ ] == 0 ? nodeCur . LeftSon : nodeCur . RightSon ;

}

return nodeCur . Value ;

}



public List < T > Decode ( List < int > bitString )

{

int position = 0 ;

var returnValue = new List < T > ( ) ;



while ( position != bitString . Count )

{

returnValue . Add ( Decode ( bitString, ref position ) ) ;

}

return returnValue ;

}

}



internal class Program

{

private const string Example = "this is an example for huffman encoding" ;



private static void Main ( )

{

var huffman = new Huffman < char > ( Example ) ;

List < int > encoding = huffman . Encode ( Example ) ;

List < char > decoding = huffman . Decode ( encoding ) ;

var outString = new string ( decoding . ToArray ( ) ) ;

Console . WriteLine ( outString == Example ? "Encoding/decoding worked" : "Encoding/Decoding failed" ) ;



var chars = new HashSet < char > ( Example ) ;

foreach ( char c in chars )

{

encoding = huffman . Encode ( c ) ;

Console . Write ( "{0}: " , c ) ;

foreach ( int bit in encoding )

{

Console . Write ( "{0}" , bit ) ;

}

Console . WriteLine ( ) ;

}

Console . ReadKey ( ) ;

}

}

}

This code builds a tree to generate huffman codes, then prints the codes.

#include <iostream>

#include <queue>

#include <map>

#include <climits> // for CHAR_BIT

#include <iterator>

#include <algorithm>



const int UniqueSymbols = 1 << CHAR_BIT ;

const char * SampleString = "this is an example for huffman encoding" ;



typedef std :: vector < bool > HuffCode ;

typedef std :: map < char , HuffCode > HuffCodeMap ;



class INode

{

public :

const int f ;



virtual ~INode ( ) { }



protected :

INode ( int f ) : f ( f ) { }

} ;



class InternalNode : public INode

{

public :

INode * const left ;

INode * const right ;



InternalNode ( INode * c0, INode * c1 ) : INode ( c0 - > f + c1 - > f ) , left ( c0 ) , right ( c1 ) { }

~InternalNode ( )

{

delete left ;

delete right ;

}

} ;



class LeafNode : public INode

{

public :

const char c ;



LeafNode ( int f, char c ) : INode ( f ) , c ( c ) { }

} ;



struct NodeCmp

{

bool operator ( ) ( const INode * lhs, const INode * rhs ) const { return lhs - > f > rhs - > f ; }

} ;



INode * BuildTree ( const int ( & frequencies ) [ UniqueSymbols ] )

{

std :: priority_queue < INode * , std :: vector < INode * > , NodeCmp > trees ;



for ( int i = 0 ; i < UniqueSymbols ; ++ i )

{

if ( frequencies [ i ] ! = 0 )

trees. push ( new LeafNode ( frequencies [ i ] , ( char ) i ) ) ;

}

while ( trees. size ( ) > 1 )

{

INode * childR = trees. top ( ) ;

trees. pop ( ) ;



INode * childL = trees. top ( ) ;

trees. pop ( ) ;



INode * parent = new InternalNode ( childR, childL ) ;

trees. push ( parent ) ;

}

return trees. top ( ) ;

}



void GenerateCodes ( const INode * node, const HuffCode & prefix, HuffCodeMap & outCodes )

{

if ( const LeafNode * lf = dynamic_cast < const LeafNode * > ( node ) )

{

outCodes [ lf - > c ] = prefix ;

}

else if ( const InternalNode * in = dynamic_cast < const InternalNode * > ( node ) )

{

HuffCode leftPrefix = prefix ;

leftPrefix. push_back ( false ) ;

GenerateCodes ( in - > left, leftPrefix, outCodes ) ;



HuffCode rightPrefix = prefix ;

rightPrefix. push_back ( true ) ;

GenerateCodes ( in - > right, rightPrefix, outCodes ) ;

}

}



int main ( )

{

// Build frequency table

int frequencies [ UniqueSymbols ] = { 0 } ;

const char * ptr = SampleString ;

while ( * ptr ! = ' \0 ' )

++ frequencies [ * ptr ++ ] ;



INode * root = BuildTree ( frequencies ) ;



HuffCodeMap codes ;

GenerateCodes ( root, HuffCode ( ) , codes ) ;

delete root ;



for ( HuffCodeMap :: const_iterator it = codes. begin ( ) ; it ! = codes. end ( ) ; ++ it )

{

std :: cout << it - > first << " " ;

std :: copy ( it - > second. begin ( ) , it - > second. end ( ) ,

std :: ostream_iterator < bool > ( std :: cout ) ) ;

std :: cout << std :: endl ;

}

return 0 ;

}

Output:

110 a 1001 c 101010 d 10001 e 1111 f 1011 g 101011 h 0101 i 1110 l 01110 m 0011 n 000 o 0010 p 01000 r 01001 s 0110 t 01111 u 10100 x 10000

(Updated to 1.6 & includes pretty-printing). Uses Java PriorityQueue

( require ' [ clojure . pprint : refer : all ] )



( defn probs [ s ]

( let [ freqs ( frequencies s ) sum ( apply + ( vals freqs ) ) ]

( into { } ( map ( fn [ [ k v ] ] [ k ( / v sum ) ] ) freqs ) ) ) )



( defn init - pq [ weighted - items ]

( let [ comp ( proxy [ java . util . Comparator ] [ ]

( compare [ a b ] ( compare ( : priority a ) ( : priority b ) ) ) )

pq ( java . util . PriorityQueue . ( count weighted - items ) comp ) ]

( doseq [ [ item prob ] weighted - items ] ( . add pq { : symbol item, : priority prob } ) )

pq ) )



( defn huffman - tree [ pq ]

( while ( > ( . size pq ) 1 )

( let [ a ( . poll pq ) b ( . poll pq )

new - node { : priority ( + ( : priority a ) ( : priority b ) ) : left a : right b } ]

( . add pq new - node ) ) )

( . poll pq ) )



( defn symbol - map

( [ t ] ( symbol - map t "" ) )

( [ { : keys [ symbol priority left right ] : as t } code ]

( if symbol [ { : symbol symbol : weight priority : code code } ]

( concat ( symbol - map left ( str code \ 0 ) )

( symbol - map right ( str code \ 1 ) ) ) ) ) )



( defn huffman - encode [ items ]

( -> items probs init - pq huffman - tree symbol - map ) )



( defn display - huffman - encode [ s ]

( ->> s huffman - encode ( sort-by : weight > ) print - table ) )



( display - huffman - encode "this is an example for huffman encoding" )

Output:

| :symbol | :weight | :code | |---------+---------+--------| | | 2/13 | 111 | | n | 4/39 | 011 | | a | 1/13 | 1001 | | e | 1/13 | 1011 | | i | 1/13 | 1100 | | f | 1/13 | 1101 | | h | 2/39 | 0001 | | s | 2/39 | 0010 | | m | 2/39 | 0100 | | o | 2/39 | 0101 | | d | 1/39 | 00000 | | t | 1/39 | 00001 | | c | 1/39 | 00110 | | x | 1/39 | 00111 | | u | 1/39 | 10000 | | l | 1/39 | 10001 | | r | 1/39 | 10100 | | g | 1/39 | 101010 | | p | 1/39 | 101011 |

Alternate Version [ edit ]

Uses c.d.priority-map. Creates a more shallow tree but appears to meet the requirements.

( require ' [ clojure . data . priority - map : refer [ priority - map - keyfn - by ] ] )

( require ' [ clojure . pprint : refer [ print - table ] ] )



( defn init - pq [ s ]

( let [ c ( count s ) ]

( ->> s frequencies

( map ( fn [ [ k v ] ] [ k { : sym k : weight ( / v c ) } ] ) )

( into ( priority - map - keyfn - by : weight < ) ) ) ) )



( defn huffman - tree [ pq ]

( letfn [ ( build - step

[ pq ]

( let [ a ( second ( peek pq ) ) b ( second ( peek ( pop pq ) ) )

nn { : sym ( str ( : sym a ) ( : sym b ) )

: weight ( + ( : weight a ) ( : weight b ) )

: left a : right b } ]

( assoc ( pop ( pop pq ) ) ( : sym nn ) nn ) ) ) ]

( ->> ( iterate build - step pq )

( drop-while # ( > ( count % ) 1 ) )

first vals first ) ) )



( defn symbol - map [ m ]

( letfn [ ( sym - step

[ { : keys [ sym weight left right ] : as m } code ]

( cond ( and left right ) # ( vector ( trampoline sym - step left ( str code \ 0 ) )

( trampoline sym - step right ( str code \ 1 ) ) )

left # ( sym - step left ( str code \ 0 ) )

right # ( sym - step right ( str code \ 1 ) )

: else { : sym sym : weight weight : code code } ) ) ]

( trampoline sym - step m "" ) ) )



( defn huffman - encode [ s ]

( ->> s init - pq huffman - tree symbol - map flatten ) )



( defn display - huffman - encode [ s ]

( ->> s huffman - encode ( sort-by : weight > ) print - table ) )



( display - huffman - encode "this is an example for huffman encoding" )

Output:

| :sym | :weight | :code | |------+---------+-------| | | 2/13 | 101 | | n | 4/39 | 010 | | a | 1/13 | 1001 | | i | 1/13 | 1101 | | e | 1/13 | 1110 | | f | 1/13 | 1111 | | m | 2/39 | 0000 | | o | 2/39 | 0001 | | s | 2/39 | 0010 | | h | 2/39 | 11001 | | g | 1/39 | 00110 | | l | 1/39 | 00111 | | t | 1/39 | 01100 | | u | 1/39 | 01101 | | c | 1/39 | 01110 | | d | 1/39 | 01111 | | p | 1/39 | 10000 | | r | 1/39 | 10001 | | x | 1/39 | 11000 |



huffman_encoding_table = ( counts ) ->

# counts is a hash where keys are characters and

# values are frequencies;

# return a hash where keys are codes and values

# are characters



build_huffman_tree = ->

# returns a Huffman tree. Each node has

# cnt: total frequency of all chars in subtree

# c: character to be encoded (leafs only)

# children: children nodes (branches only)

q = min_queue ( )

for c , cnt of counts

q. enqueue cnt ,

cnt : cnt

c : c

while q. size ( ) > = 2

a = q. dequeue ( )

b = q. dequeue ( )

cnt = a. cnt + b. cnt

node =

cnt : cnt

children : [ a , b ]

q. enqueue cnt , node

root = q. dequeue ( )



root = build_huffman_tree ( )



codes = { }

encode = ( node , code ) ->

if node. c ?

codes [ code ] = node. c

else

encode node. children [ 0 ] , code + "0"

encode node. children [ 1 ] , code + "1"



encode ( root , "" )

codes



min_queue = ->

# This is very non-optimized; you could use a binary heap for better

# performance. Items with smaller priority get dequeued first.

arr = [ ]

enqueue : ( priority , data ) ->

i = 0

while i < arr. length

if priority < arr [ i ] . priority

break

i + = 1

arr. splice i , 0 ,

priority : priority

data : data

dequeue : ->

arr. shift ( ) . data

size : -> arr. length

_internal : ->

arr



freq_count = ( s ) ->

cnts = { }

for c in s

cnts [ c ] ? = 0

cnts [ c ] + = 1

cnts



rpad = ( s , n ) ->

while s. length < n

s + = ' '

s



examples = [

"this is an example for huffman encoding"

"abcd"

"abbccccddddddddeeeeeeeee"

]



for s in examples

console. log "---- #{s}"

counts = freq_count ( s )

huffman_table = huffman_encoding_table ( counts )

codes = ( code for code of huffman_table ) . sort ( )

for code in codes

c = huffman_table [ code ]

console. log "#{rpad(code, 5)}: #{c} (#{counts[c]})"

console. log ( )



Output:

> coffee huffman.coffee ---- this is an example for huffman encoding 000 : n (4) 0010 : s (2) 0011 : m (2) 0100 : o (2) 01010: t (1) 01011: x (1) 01100: p (1) 01101: l (1) 01110: r (1) 01111: u (1) 10000: c (1) 10001: d (1) 1001 : i (3) 101 : (6) 1100 : a (3) 1101 : e (3) 1110 : f (3) 11110: g (1) 11111: h (2) ---- abcd 00 : a (1) 01 : b (1) 10 : c (1) 11 : d (1) ---- abbccccddddddddeeeeeeeee 0 : e (9) 1000 : a (1) 1001 : b (2) 101 : c (4) 11 : d (8)

This implementation uses a tree built of huffman-node s, and a hash table mapping from elements of the input sequence to huffman-node s. The priority queue is implemented as a sorted list. (For a more efficient implementation of a priority queue, see the Heapsort task.)

( defstruct huffman-node

( weight 0 : type number )

( element nil : type t )

( encoding nil : type ( or null bit-vector ) )

( left nil : type ( or null huffman-node ) )

( right nil : type ( or null huffman-node ) ) )



( defun initial-huffman-nodes ( sequence & key ( test ' eql ) )

( let * ( ( length ( length sequence ) )

( increment ( / 1 length ) )

( nodes ( make-hash-table : size length : test test ) )

( queue ' ( ) ) )

( map nil #' ( lambda ( element )

( multiple-value-bind ( node presentp ) ( gethash element nodes )

( if presentp

( incf ( huffman-node-weight node ) increment )

( let ( ( node ( make-huffman-node : weight increment

: element element ) ) )

( setf ( gethash element nodes ) node

queue ( list * node queue ) ) ) ) ) )

sequence )

( values nodes ( sort queue ' < : key 'huffman-node-weight ) ) ) )



( defun huffman-tree ( sequence & key ( test ' eql ) )

( multiple-value-bind ( nodes queue )

( initial-huffman-nodes sequence : test test )

( do ( ) ( ( endp ( rest queue ) ) ( values nodes ( first queue ) ) )

( destructuring-bind ( n1 n2 & rest queue-rest ) queue

( let ( ( n3 ( make-huffman-node

: left n1

: right n2

: weight ( + ( huffman-node-weight n1 )

( huffman-node-weight n2 ) ) ) ) )

( setf queue ( merge ' list ( list n3 ) queue-rest ' <

: key 'huffman-node-weight ) ) ) ) ) ) ) 1



( defun huffman-codes ( sequence & key ( test ' eql ) )

( multiple-value-bind ( nodes tree )

( huffman-tree sequence : test test )

( labels ( ( hc ( node length bits )

( let ( ( left ( huffman-node-left node ) )

( right ( huffman-node-right node ) ) )

( cond

( ( and ( null left ) ( null right ) )

( setf ( huffman-node-encoding node )

( make- array length : element-type 'bit

: initial-contents ( reverse bits ) ) ) )

( t ( hc left ( 1 + length ) ( list * 0 bits ) )

( hc right ( 1 + length ) ( list * 1 bits ) ) ) ) ) ) )

( hc tree 0 ' ( ) )

nodes ) ) )



( defun print-huffman-code-table ( nodes & optional ( out *standard-output* ) )

( format out "~&Element~10tWeight~20tCode" )

( loop for node being each hash- value of nodes

do ( format out "~&~s~10t~s~20t~s"

( huffman-node-element node )

( huffman-node-weight node )

( huffman-node-encoding node ) ) ) )

Example:

> (print-huffman-code-table (huffman-codes "this is an example for huffman encoding")) Element Weight Code #\t 1/39 #*10010 #\d 1/39 #*01101 #\m 2/39 #*0100 #\f 1/13 #*1100 #\o 2/39 #*0111 #\x 1/39 #*100111 #\h 2/39 #*1000 #\a 1/13 #*1010 #\s 2/39 #*0101 #\c 1/39 #*00010 #\l 1/39 #*00001 #\u 1/39 #*00011 #\e 1/13 #*1101 #

4/39 #*001 #\g 1/39 #*01100 #\p 1/39 #*100110 #\i 1/13 #*1011 #\r 1/39 #*00000 #\Space 2/13 #*111

import std. stdio , std. algorithm , std. typecons , std. container , std. array ;



auto encode ( alias eq , R ) ( Group ! ( eq , R ) sf ) /*pure nothrow @safe*/ {

auto heap = sf. map ! ( s => tuple ( s [ 1 ] , [ tuple ( s [ 0 ] , "" ) ] ) )

. array . heapify ! q { b < a } ;



while ( heap. length > 1 ) {

auto lo = heap. front ; heap. removeFront ;

auto hi = heap. front ; heap. removeFront ;

lo [ 1 ] . each ! ( ( ref pair ) => pair [ 1 ] = '0' ~ pair [ 1 ] ) ;

hi [ 1 ] . each ! ( ( ref pair ) => pair [ 1 ] = '1' ~ pair [ 1 ] ) ;

heap. insert ( tuple ( lo [ 0 ] + hi [ 0 ] , lo [ 1 ] ~ hi [ 1 ] ) ) ;

}

return heap. front [ 1 ] . schwartzSort ! q { tuple ( a [ 1 ] . length , a [ 0 ] ) } ;

}



void main ( ) /*@safe*/ {

immutable s = "this is an example for huffman encoding" d ;

foreach ( const p ; s. dup . sort ( ) . group . encode )

writefln ( "'%s' %s" , p [ ] ) ;

}

Output:

' ' 101 'n' 010 'a' 1001 'e' 1100 'f' 1101 'h' 0001 'i' 1110 'm' 0010 'o' 0011 's' 0111 'g' 00000 'l' 00001 'p' 01100 'r' 01101 't' 10000 'u' 10001 'x' 11110 'c' 111110 'd' 111111

Adapted C# solution.



class HUFFMAN_NODE [ T -> COMPARABLE ]

inherit

COMPARABLE

redefine

three_way_comparison

end

create

leaf_node, inner_node

feature { NONE }

leaf_node ( a_probability : REAL_64; a_value : T )

do

probability := a_probability

value := a_value

is_leaf := true



left := void

right := void

parent := void

end



inner_node ( a_left, a_right : HUFFMAN_NODE [ T ] )

do

left := a_left

right := a_right



a_left. parent := Current

a_right. parent := Current

a_left. is_zero := true

a_right. is_zero := false



probability := a_left. probability + a_right. probability

is_leaf := false

end



feature

probability : REAL_64

value : detachable T





is_leaf : BOOLEAN

is_zero : BOOLEAN assign set_is_zero



set_is_zero ( a_value : BOOLEAN )

do

is_zero := a_value

end



left : detachable HUFFMAN_NODE [ T ]

right : detachable HUFFMAN_NODE [ T ]

parent : detachable HUFFMAN_NODE [ T ] assign set_parent



set_parent ( a_parent : detachable HUFFMAN_NODE [ T ] )

do

parent := a_parent

end



is_root : BOOLEAN

do

Result := parent = void

end



bit_value : INTEGER

do

if is_zero then

Result := 0

else

Result := 1

end

end

feature -- comparable implementation

is_less alias "<" ( other : like Current ) : BOOLEAN

do

Result := three_way_comparison ( other ) = - 1

end



three_way_comparison ( other : like Current ) : INTEGER

do

Result := - probability. three_way_comparison ( other. probability )

end

end



class HUFFMAN

create

make

feature { NONE }

make ( a_string : STRING )

require

non_empty_string : a_string. count > 0

local

l_queue : HEAP_PRIORITY_QUEUE [ HUFFMAN_NODE [ CHARACTER ] ]

l_counts : HASH_TABLE [ INTEGER , CHARACTER ]

l_node : HUFFMAN_NODE [ CHARACTER ]

l_left, l_right : HUFFMAN_NODE [ CHARACTER ]

do

create l_queue. make ( a_string. count )

create l_counts. make ( 10 )



across a_string as char

loop

if not l_counts. has ( char. item ) then

l_counts. put ( 0 , char. item )

end

l_counts. replace ( l_counts. at ( char. item ) + 1 , char. item )

end



create leaf_dictionary. make ( l_counts. count )



across l_counts as kv

loop

create l_node. leaf_node ( ( kv. item * 1.0 ) / a_string. count , kv. key )

l_queue. put ( l_node )

leaf_dictionary. put ( l_node, kv. key )

end



from

until

l_queue. count <= 1

loop

l_left := l_queue. item

l_queue. remove

l_right := l_queue. item

l_queue. remove



create l_node. inner_node ( l_left, l_right )

l_queue. put ( l_node )

end



root := l_queue. item

root . is_zero := false

end

feature

root : HUFFMAN_NODE [ CHARACTER ]

leaf_dictionary : HASH_TABLE [ HUFFMAN_NODE [ CHARACTER ] , CHARACTER ]



encode ( a_value : CHARACTER ) : STRING

require

encodable : leaf_dictionary. has ( a_value )

local

l_node : HUFFMAN_NODE [ CHARACTER ]

do

Result := ""

if attached leaf_dictionary. item ( a_value ) as attached_node then

l_node := attached_node

from



until

l_node. is_root

loop

Result . append_integer ( l_node. bit_value )

if attached l_node. parent as parent then

l_node := parent

end

end



Result . mirror

end

end

end



class

APPLICATION

create

make



feature { NONE }

make -- entry point

local

l_str : STRING

huff : HUFFMAN

chars : BINARY_SEARCH_TREE_SET [ CHARACTER ]

do

l_str := "this is an example for huffman encoding"



create huff. make ( l_str )



create chars. make

chars. fill ( l_str )



from

chars. start

until

chars. off

loop

print ( chars. item . out + ": " + huff. encode ( chars. item ) + " %N " )

chars. forth

end

end

end



Output:

: 101 a: 1001 c: 01110 d: 01111 e: 1111 f: 1100 g: 01001 h: 11101 i: 1101 l: 10001 m: 0010 n: 000 o: 0011 p: 10000 r: 11100 s: 0110 t: 01000 u: 01011 x: 01010

The main part of the code used here is extracted from Michel Rijnders' GitHubGist. See also his blog, for a complete description of the original module.

- module ( huffman ) .



- export ( [ encode / 1 , decode / 2 , main / 0 ] ) .



encode ( Text ) ->

Tree = tree ( freq_table ( Text ) ) ,

Dict = dict : from_list ( codewords ( Tree ) ) ,

Code = << << ( dict : fetch ( Char , Dict ) ) / bitstring >> || Char <- Text >> ,

{ Code , Tree , Dict } .



decode ( Code , Tree ) ->

decode ( Code , Tree , Tree , [ ] ) .



main ( ) ->

{ Code , Tree , Dict } = encode ( "this is an example for huffman encoding" ) ,

[ begin

io : format ( "~s: " , [ [ Key ] ] ) ,

print_bits ( Value )

end || { Key , Value } <- lists : sort ( dict : to_list ( Dict ) ) ] ,

io : format ( "encoded: " ) ,

print_bits ( Code ) ,

io : format ( "decoded: " ) ,

io : format ( "~s

" , [ decode ( Code , Tree ) ] ) .



decode ( <<>> , _ , _ , Result ) ->

lists : reverse ( Result ) ;

decode ( << 0 : 1 , Rest / bits >> , Tree , { L = { _ , _ } , _R } , Result ) ->

decode ( << Rest / bits >> , Tree , L , Result ) ;

decode ( << 0 : 1 , Rest / bits >> , Tree , { L , _R } , Result ) ->

decode ( << Rest / bits >> , Tree , Tree , [ L | Result ] ) ;

decode ( << 1 : 1 , Rest / bits >> , Tree , { _L , R = { _ , _ } } , Result ) ->

decode ( << Rest / bits >> , Tree , R , Result ) ;

decode ( << 1 : 1 , Rest / bits >> , Tree , { _L , R } , Result ) ->

decode ( << Rest / bits >> , Tree , Tree , [ R | Result ] ) .



codewords ( { L , R } ) ->

codewords ( L , << 0 : 1 >> ) ++ codewords ( R , << 1 : 1 >> ) .



codewords ( { L , R } , << Bits / bits >> ) ->

codewords ( L , << Bits / bits , 0 : 1 >> ) ++ codewords ( R , << Bits / bits , 1 : 1 >> ) ;

codewords ( Symbol , << Bits / bitstring >> ) ->

[ { Symbol , Bits } ] .



tree ( [ { N , _ } | [ ] ] ) ->

N ;

tree ( Ns ) ->

[ { N1 , C1 } , { N2 , C2 } | Rest ] = lists : keysort ( 2 , Ns ) ,

tree ( [ { { N1 , N2 } , C1 + C2 } | Rest ] ) .



freq_table ( Text ) ->

freq_table ( lists : sort ( Text ) , [ ] ) .



freq_table ( [ ] , Acc ) ->

Acc ;

freq_table ( [ S | Rest ] , Acc ) ->

{ Block , MoreBlocks } = lists : splitwith ( fun ( X ) -> X == S end , Rest ) ,

freq_table ( MoreBlocks , [ { S , 1 + length ( Block ) } | Acc ] ) .



print_bits ( <<>> ) ->

io : format ( "

" ) ;

print_bits ( << Bit : 1 , Rest / bitstring >> ) ->

io : format ( "~w" , [ Bit ] ) ,

print_bits ( Rest ) .

Output:

: 111 a: 1011 c: 10010 d: 100111 e: 1010 f: 1101 g: 100110 h: 1000 i: 1100 l: 00001 m: 0101 n: 001 o: 0100 p: 00000 r: 00011 s: 0111 t: 00010 u: 01101 x: 01100 encoded: 0001010001100011111111000111111101100111110100110010110101000000000110101111101010000011111100001101110111010101101100111110100011001001001001111100001100110 decoded: this is an example for huffman encoding

Translation of: OCaml

type 'a HuffmanTree =

| Leaf of int * ' a

| Node of int * 'a HuffmanTree * ' a HuffmanTree



let freq = function Leaf ( f, _ ) | Node ( f, _, _ ) -> f

let freqCompare a b = compare ( freq a ) ( freq b )



let buildTree charFreqs =

let leaves = List . map ( fun ( c,f ) -> Leaf ( f,c ) ) charFreqs

let freqSort = List . sortWith freqCompare

let rec aux = function

| [ ] -> failwith "empty list"

| [ a ] -> a

| a::b::tl ->

let node = Node ( freq a + freq b, a, b )

aux ( freqSort ( node::tl ) )

aux ( freqSort leaves )



let rec printTree = function

| code, Leaf ( f, c ) ->

printfn "%c \t %d \t %s" c f ( String . concat "" ( List . rev code ) ) ;

| code, Node ( _, l, r ) ->

printTree ( "0" ::code, l ) ;

printTree ( "1" ::code, r )



let ( ) =

let str = "this is an example for huffman encoding"

let charFreqs =

str |> Seq . groupBy id

|> Seq . map ( fun ( c, vals ) -> ( c, Seq . length vals ) )

|> Map . ofSeq



let tree = charFreqs |> Map . toList |> buildTree

printfn "Symbol \t Weight \t Huffman code" ;

printTree ( [ ] , tree )

Output:

Symbol Weight Huffman code p 1 00000 r 1 00001 g 1 00010 l 1 00011 n 4 001 m 2 0100 o 2 0101 c 1 01100 d 1 01101 h 2 0111 s 2 1000 x 1 10010 t 1 100110 u 1 100111 f 3 1010 i 3 1011 a 3 1100 e 3 1101 6 111



USING: kernel sequences combinators accessors assocs math hashtables math.order

sorting.slots classes formatting prettyprint ;



IN: huffman



! -------------------------------------

! CLASSES -----------------------------

! -------------------------------------



TUPLE: huffman-node

weight element encoding left right ;



! For nodes

: <huffman-tnode> ( left right -- huffman )

huffman-node new [ left<< ] [ swap >>right ] bi ;



! For leafs

: <huffman-node> ( element -- huffman )

1 swap f f f huffman-node boa ;





! --------------------------------------

! INITIAL HASHTABLE --------------------

! --------------------------------------



<PRIVATE



! Increment node if it already exists

! Else make it and add it to the hash-table

: huffman-gen ( element nodes -- )

2dup at

[ [ [ 1 + ] change-weight ] change-at ]

[ [ dup <huffman-node> swap ] dip set-at ] if ;



! Curry node-hash. Then each over the seq

! to get the weighted values

: (huffman) ( nodes seq -- nodes )

dup [ [ huffman-gen ] curry each ] dip ;



! ---------------------------------------

! TREE GENERATION -----------------------

! ---------------------------------------



: (huffman-weight) ( node1 node2 -- weight )

[ weight>> ] dup bi* + ;



! Combine two nodes into the children of a parent

! node which has a weight equal to their collective

! weight

: (huffman-combine) ( node1 node2 -- node3 )

[ (huffman-weight) ]

[ <huffman-tnode> ] 2bi

swap >>weight ;



! Generate a tree by combining nodes

! in the priority queue until we're

! left with the root node

: (huffman-tree) ( nodes -- tree )

dup rest empty?

[ first ] [

{ { weight>> <=> } } sort-by

[ rest rest ] [ first ]

[ second ] tri

(huffman-combine) prefix

(huffman-tree)

] if ; recursive



! --------------------------------------

! ENCODING -----------------------------

! --------------------------------------



: (huffman-leaf?) ( node -- bool )

[ left>> huffman-node instance? ]

[ right>> huffman-node instance? ] bi and not ;



: (huffman-leaf) ( leaf bit -- )

swap encoding<< ;



DEFER: (huffman-encoding)



! Recursively walk the nodes left and right

: (huffman-node) ( bit nodes -- )

[ 0 suffix ] [ 1 suffix ] bi

[ [ left>> ] [ right>> ] bi ] 2dip

[ swap ] dip

[ (huffman-encoding) ] [email protected] ;



: (huffman-encoding) ( bit nodes -- )

over (huffman-leaf?)

[ (huffman-leaf) ]

[ (huffman-node) ] if ;



PRIVATE>



! -------------------------------

! USER WORDS --------------------

! -------------------------------



: huffman-print ( nodes -- )

"Element" "Weight" "Code" "

%10s\t%10s\t%6s

" printf

{ { weight>> >=< } } sort-by

[ [ encoding>> ] [ element>> ] [ weight>> ] tri

"%8c\t%7d\t\t" printf pprint "

" printf ] each ;



: huffman ( sequence -- nodes )

H{ } clone (huffman) values

[ (huffman-tree) { } (huffman-encoding) ] keep ;



! ---------------------------------

! USAGE ---------------------------

! ---------------------------------



! { 1 2 3 4 } huffman huffman-print

! "this is an example of a huffman tree" huffman huffman-print



! Element Weight Code

! 7 { 0 0 0 }

! a 4 { 1 1 1 }

! e 4 { 1 1 0 }

! f 3 { 0 0 1 0 }

! h 2 { 1 0 1 0 }

! i 2 { 0 1 0 1 }

! m 2 { 0 1 0 0 }

! n 2 { 0 1 1 1 }

! s 2 { 0 1 1 0 }

! t 2 { 0 0 1 1 }

! l 1 { 1 0 1 1 1 }

! o 1 { 1 0 1 1 0 }

! p 1 { 1 0 0 0 1 }

! r 1 { 1 0 0 0 0 }

! u 1 { 1 0 0 1 1 }

! x 1 { 1 0 0 1 0 }





class Node

{

Float probability := 0.0f

}



class Leaf : Node

{

Int character



new make (Int character, Float probability)

{

this.character = character

this.probability = probability

}

}



class Branch : Node

{

Node left

Node right



new make (Node left, Node right)

{

this.left = left

this.right = right

probability = this.left.probability + this.right.probability

}

}



class Huffman

{

Node[] queue := [,]

Str:Str table := [:]



new make (Int[] items)

{

uniqueItems := items.dup.unique

uniqueItems.each |Int item|

{

num := items.findAll { it == item }.size

queue.add (Leaf(item, num.toFloat / items.size))

}

createTree

createTable

}



Void createTree ()

{

while (queue.size > 1)

{

queue.sort |a,b| {a.probability <=> b.probability}

node1 := queue.removeAt (0)

node2 := queue.removeAt (0)

queue.add (Branch (node1, node2))

}

}



Void traverse (Node node, Str encoding)

{

if (node is Leaf)

{

table[(node as Leaf).character.toChar] = encoding

}

else // (node is Branch)

{

traverse ((node as Branch).left, encoding + "0")

traverse ((node as Branch).right, encoding + "1")

}

}



Void createTable ()

{

if (queue.size != 1) return // error!

traverse (queue.first, "")

}



override Str toStr ()

{

result := "Huffman Encoding Table:

"

table.keys.sort.each |Str key|

{

result += "$key -> ${table[key]}

"

}

return result

}

}



class Main

{

public static Void main ()

{

example := "this is an example for huffman encoding"

huffman := Huffman (example.chars)

echo ("From \"$example\"")

echo (huffman)

}

}



Output:

From "this is an example for huffman encoding" Huffman Encoding Table: -> 101 a -> 1100 c -> 10000 d -> 10001 e -> 1101 f -> 1110 g -> 11110 h -> 11111 i -> 1001 l -> 01101 m -> 0011 n -> 000 o -> 0100 p -> 01100 r -> 01110 s -> 0010 t -> 01010 u -> 01111 x -> 01011

! output:

! d-> 00000, t-> 00001, h-> 0001, s-> 0010,

! c-> 00110, x-> 00111, m-> 0100, o-> 0101,

! n-> 011, u-> 10000, l-> 10001, a-> 1001,

! r-> 10100, g-> 101010, p-> 101011,

! e-> 1011, i-> 1100, f-> 1101, -> 111

!

! 00001|0001|1100|0010|111|1100|0010|111|1001|011|

! 111|1011|00111|1001|0100|101011|10001|1011|111|

! 1101|0101|10100|111|0001|10000|1101|1101|0100|

! 1001|011|111|1011|011|00110|0101|00000|1100|011|101010|

!

module huffman

implicit none

type node

character ( len = 1 ) , allocatable :: sym ( : )

character ( len = 10 ) , allocatable :: code ( : )

integer :: freq

contains

procedure :: show = > show_node

end type



type queue

type ( node ) , allocatable :: buf ( : )

integer :: n = 0

contains

procedure :: extractmin

procedure :: append

procedure :: siftdown

end type



contains



subroutine siftdown ( this, a )

class ( queue ) :: this

integer :: a , parent, child

associate ( x = > this % buf )

parent = a

do while ( parent * 2 < = this % n )

child = parent * 2

if ( child + 1 < = this % n ) then

if ( x ( child + 1 ) % freq < x ( child ) % freq ) then

child = child + 1

end if

end if

if ( x ( parent ) % freq > x ( child ) % freq ) then

x ( [ child, parent ] ) = x ( [ parent, child ] )

parent = child

else

exit

end if

end do

end associate

end subroutine



function extractmin ( this ) result ( res )

class ( queue ) :: this

type ( node ) :: res

res = this % buf ( 1 )

this % buf ( 1 ) = this % buf ( this % n )

this % n = this % n - 1

call this % siftdown ( 1 )

end function



subroutine append ( this, x )

class ( queue ) , intent ( inout ) :: this

type ( node ) :: x

type ( node ) , allocatable :: tmp ( : )

integer :: i

this % n = this % n + 1

if ( . not . allocated ( this % buf ) ) allocate ( this % buf ( 1 ) )

if ( size ( this % buf ) <this % n ) then

allocate ( tmp ( 2 * size ( this % buf ) ) )

tmp ( 1 : this % n - 1 ) = this % buf

call move_alloc ( tmp, this % buf )

end if

this % buf ( this % n ) = x

i = this % n

do

i = i / 2

if ( i == 0 ) exit

call this % siftdown ( i )

end do

end subroutine



function join ( a, b ) result ( c )

type ( node ) :: a , b, c

integer :: i , n, n1

n1 = size ( a % sym )

n = n1 + size ( b % sym )

c % freq = a % freq + b % freq

allocate ( c % sym ( n ) , c % code ( n ) )

do i = 1 , n1

c % sym ( i ) = a % sym ( i )

c % code ( i ) = "0" // trim ( a % code ( i ) )

end do

do i = 1 , size ( b % sym )

c % sym ( i + n1 ) = b % sym ( i )

c % code ( i + n1 ) = "1" // trim ( b % code ( i ) )

end do

end function



subroutine show_node ( this )

class ( node ) :: this

integer :: i

write ( * , "(*(g0,'-> ',g0,:,', '))" , advance = "no" ) &

( this % sym ( i ) , trim ( this % code ( i ) ) , i = 1 , size ( this % sym ) )

print *

end subroutine



function create ( letter, freq ) result ( this )

character :: letter

integer :: freq

type ( node ) :: this

allocate ( this % sym ( 1 ) , this % code ( 1 ) )

this % sym ( 1 ) = letter ; this % code ( 1 ) = ""

this % freq = freq

end function

end module



program main

use huffman

character ( len =* ) , parameter :: txt = &

"this is an example for huffman encoding"

integer :: i , freq ( 0 : 255 ) = 0

type ( queue ) :: Q

type ( node ) :: x

do i = 1 , len ( txt )

freq ( ichar ( txt ( i : i ) ) ) = freq ( ichar ( txt ( i : i ) ) ) + 1

end do

do i = 0 , 255

if ( freq ( i ) > 0 ) then

call Q % append ( create ( char ( i ) , freq ( i ) ) )

end if

end do

do i = 1 , Q % n - 1

call Q % append ( join ( Q % extractmin ( ) ,Q % extractmin ( ) ) )

end do

x = Q % extractmin ( )

call x % show ( )

do i = 1 , len ( txt )

do k = 1 , size ( x % sym )

if ( x % sym ( k ) == txt ( i : i ) ) exit

end do

write ( * , "(a,'|')" , advance = "no" ) trim ( x % code ( k ) )

end do

print *

end program



Translation of: Java

package main



import (

"container/heap"

"fmt"

)



type HuffmanTree interface {

Freq () int

}



type HuffmanLeaf struct {

freq int

value rune

}



type HuffmanNode struct {

freq int

left , right HuffmanTree

}



func ( self HuffmanLeaf ) Freq () int {

return self . freq

}



func ( self HuffmanNode ) Freq () int {

return self . freq

}



type treeHeap [] HuffmanTree



func ( th treeHeap ) Len () int { return len ( th ) }

func ( th treeHeap ) Less ( i , j int ) bool {

return th [ i ] . Freq () < th [ j ] . Freq ()

}

func ( th * treeHeap ) Push ( ele interface {}) {

* th = append ( * th , ele . ( HuffmanTree ))

}

func ( th * treeHeap ) Pop () ( popped interface {}) {

popped = ( * th )[ len ( * th ) - 1 ]

* th = ( * th )[: len ( * th ) - 1 ]

return

}

func ( th treeHeap ) Swap ( i , j int ) { th [ i ], th [ j ] = th [ j ], th [ i ] }



func buildTree ( symFreqs map [ rune ] int ) HuffmanTree {

var trees treeHeap

for c , f := range symFreqs {

trees = append ( trees , HuffmanLeaf { f , c })

}

heap . Init ( &trees )

for trees . Len () > 1 {

// two trees with least frequency

a := heap . Pop ( & trees ) . ( HuffmanTree )

b := heap . Pop ( & trees ) . ( HuffmanTree )



// put into new node and re-insert into queue

heap . Push ( &trees , HuffmanNode { a . Freq () + b . Freq (), a , b })

}

return heap . Pop ( &trees ) . ( HuffmanTree )

}



func printCodes ( tree HuffmanTree , prefix [] byte ) {

switch i := tree . ( type ) {

case HuffmanLeaf :

// print out symbol, frequency, and code for this

// leaf (which is just the prefix)

fmt . Printf ( "%c \t %d \t %s

" , i . value , i . freq , string ( prefix ))

case HuffmanNode :

// traverse left

prefix = append ( prefix , '0' )

printCodes ( i . left , prefix )

prefix = prefix [: len ( prefix ) - 1 ]



// traverse right

prefix = append ( prefix , '1' )

printCodes ( i . right , prefix )

prefix = prefix [: len ( prefix ) - 1 ]

}

}



func main () {

test := "this is an example for huffman encoding"



symFreqs := make ( map [ rune ] int )

// read each symbol and record the frequencies

for _ , c := range test {

symFreqs [ c ] ++

}



// build tree

tree := buildTree ( symFreqs )



// print out results

fmt . Println ( "SYMBOL \t WEIGHT \t HUFFMAN CODE" )

printCodes ( tree , [] byte {})

}

Output:

SYMBOL WEIGHT HUFFMAN CODE n 4 000 m 2 0010 o 2 0011 s 2 0100 u 1 01010 p 1 01011 h 2 0110 d 1 01110 c 1 01111 t 1 10000 l 1 10001 x 1 10010 r 1 100110 g 1 100111 i 3 1010 e 3 1011 6 110 f 3 1110 a 3 1111

Translation of: Python

package main



import (

"container/heap"

"fmt"

)



type coded struct {

sym rune

code string

}



type counted struct {

total int

syms [] coded

}



type cHeap [] counted



// satisfy heap.Interface

func ( c cHeap ) Len () int { return len ( c ) }

func ( c cHeap ) Less ( i , j int ) bool { return c [ i ] . total < c [ j ] . total }

func ( c cHeap ) Swap ( i , j int ) { c [ i ], c [ j ] = c [ j ], c [ i ] }

func ( c * cHeap ) Push ( ele interface {}) {

* c = append ( * c , ele . ( counted ))

}

func ( c * cHeap ) Pop () ( popped interface {}) {

popped = ( * c )[ len ( * c ) - 1 ]

* c = ( * c )[: len ( * c ) - 1 ]

return

}



func encode ( sym2freq map [ rune ] int ) [] coded {

var ch cHeap

for sym , freq := range sym2freq {

ch = append ( ch , counted { freq , [] coded {{ sym : sym }}})

}

heap . Init ( &ch )

for len ( ch ) > 1 {

a := heap . Pop ( &ch ) . ( counted )

b := heap . Pop ( &ch ) . ( counted )

for i , c := range a . syms {

a . syms [ i ] . code = "0" + c . code

}

for i , c := range b . syms {

b . syms [ i ] . code = "1" + c . code

}

heap . Push ( &ch , counted { a . total + b . total , append ( a . syms , b . syms ... )})

}

return heap . Pop ( &ch ) . ( counted ) . syms

}



const txt = "this is an example for huffman encoding"



func main () {

sym2freq := make ( map [ rune ] int )

for _ , c := range txt {

sym2freq [ c ] ++

}

table := encode ( sym2freq )

fmt . Println ( "Symbol Weight Huffman Code" )

for _ , c := range table {

fmt . Printf ( " %c %d %s

" , c . sym , sym2freq [ c . sym ], c . code )

}

}

Implemented and tested with Groovy 2.3.



import groovy.transform.*



@Canonical

@Sortable ( includes = [ 'freq' , 'letter' ] )

class Node {

String letter

int freq

Node left

Node right

boolean isLeaf ( ) { left == null && right == null }

}



Map correspondance ( Node n, Map corresp = [ : ] , String prefix = '' ) {

if ( n. isLeaf ( ) ) {

corresp [ n. letter ] = prefix ? : '0'

} else {

correspondance ( n. left , corresp, prefix + '0' )

correspondance ( n. right , corresp, prefix + '1' )

}

return corresp

}



Map huffmanCode ( String message ) {

def queue = message. toList ( ) . countBy { it } // char frequencies

. collect { String letter, int freq -> // transformed into tree nodes

new Node ( letter, freq )

} as TreeSet // put in a queue that maintains ordering



while ( queue. size ( ) > 1 ) {

def ( nodeLeft, nodeRight ) = [ queue. pollFirst ( ) , queue. pollFirst ( ) ]



queue << new Node (

freq: nodeLeft. freq + nodeRight. freq ,

letter: nodeLeft. letter + nodeRight. letter ,

left: nodeLeft, right: nodeRight

)

}



return correspondance ( queue. pollFirst ( ) )

}



String encode ( CharSequence msg, Map codeTable ) {

msg. collect { codeTable [ it ] } . join ( )

}



String decode ( String codedMsg, Map codeTable, String decoded = '' ) {

def pair = codeTable. find { k, v -> codedMsg. startsWith ( v ) }

pair ? pair. key + decode ( codedMsg. substring ( pair. value . size ( ) ) , codeTable )

: decoded

}



Usage:



def message = "this is an example for huffman encoding"



def codeTable = huffmanCode ( message )

codeTable. each { k, v -> println "$k: $v" }



def encoded = encode ( message, codeTable )

println encoded



def decoded = decode ( encoded, codeTable )

println decoded



Output:

g: 00000 l: 00001 h: 0001 m: 0010 o: 0011 n: 010 p: 01100 r: 01101 s: 0111 t: 10000 u: 10001 a: 1001 : 101 e: 1100 f: 1101 i: 1110 x: 11110 c: 111110 d: 111111 1000000011110011110111100111101100101010111001111010010010011000000111001011101001101101101000110001110111010010100101010111000101111100011111111111001000000 this is an example for huffman encoding

Credits go to huffman where you'll also find a non-tree solution. Uses sorted list as a priority queue.

import Data . List ( group , insertBy , sort , sortBy )

import Control . Arrow ( ( && & ) , second )

import Data . Ord ( comparing )



data HTree a

= Leaf a

| Branch ( HTree a )

( HTree a )

deriving ( Show , Eq , Ord )



test :: String -> IO ( )

test =

mapM_ ( \ ( a , b ) -> putStrLn ( ' \' ' : a : ( "' : " ++ b ) ) ) .

serialize . huffmanTree . freq



serialize :: HTree a -> [ ( a , String ) ]

serialize ( Branch l r ) =

( second ( '0' : ) <$> serialize l ) ++ ( second ( '1' : ) <$> serialize r )

serialize ( Leaf x ) = [ ( x , "" ) ]



huffmanTree

:: ( Ord w , Num w )

=> [ ( w , a ) ] -> HTree a

huffmanTree =

snd .

head . until ( null . tail ) hstep . sortBy ( comparing fst ) . fmap ( second Leaf )



hstep

:: ( Ord a , Num a )

=> [ ( a , HTree b ) ] -> [ ( a , HTree b ) ]

hstep ( ( w1 , t1 ) : ( w2 , t2 ) :wts ) =

insertBy ( comparing fst ) ( w1 + w2 , Branch t1 t2 ) wts



freq

:: Ord a

=> [ a ] -> [ ( Int , a ) ]

freq = fmap ( length && & head ) . group . sort



main :: IO ( )

main = test "this is an example for huffman encoding"

Output:

'p' : 00000

'r' : 00001

'g' : 00010

'l' : 00011

'n' : 001

'm' : 0100

'o' : 0101

'c' : 01100

'd' : 01101

'h' : 0111

's' : 1000

'x' : 10010

't' : 100110

'u' : 100111

'f' : 1010

'i' : 1011

'a' : 1100

'e' : 1101

' ' : 111

Using Set as a priority queue [ edit ]

(might be worth it for bigger alphabets):

import qualified Data . Set as S



htree :: ( Ord t , Num t , Ord a ) => S . Set ( t , HTree a ) -> HTree a

htree ts | S . null ts _ 1 = t1

| otherwise = htree ts _ 3

where

( ( w1 , t1 ) , ts _ 1 ) = S . deleteFindMin ts

( ( w2 , t2 ) , ts _ 2 ) = S . deleteFindMin ts _ 1

ts _ 3 = S . insert ( w1 + w2 , Branch t1 t2 ) ts _ 2



huffmanTree :: ( Ord w , Num w , Ord a ) => [ ( w , a ) ] -> HTree a

huffmanTree = htree . S . fromList . map ( second Leaf )

A non-tree version [ edit ]

This produces the output required without building the Huffman tree at all, by building all the trace strings directly while reducing the histogram:

import Data . List ( sortBy , insertBy , sort , group )

import Control . Arrow ( second , ( && & ) )

import Data . Ord ( comparing )



freq :: Ord a => [ a ] -> [ ( Int , a ) ]

freq = map ( length && & head ) . group . sort



huffman :: [ ( Int , Char ) ] -> [ ( Char , String ) ]

huffman = reduce . map ( \ ( p , c ) -> ( p , [ ( c , "" ) ] ) ) . sortBy ( comparing fst )

where add ( p1 , xs1 ) ( p2 , xs2 ) = ( p1 + p2 , map ( second ( '0' : ) ) xs1 ++ map ( second ( '1' : ) ) xs2 )

reduce [ ( _, ys ) ] = sortBy ( comparing fst ) ys

reduce ( x1:x2:xs ) = reduce $ insertBy ( comparing fst ) ( add x1 x2 ) xs



test s = mapM_ ( \ ( a , b ) -> putStrLn ( ' \' ' : a : " \' : " ++ b ) ) . huffman . freq $ s



main = do

test "this is an example for huffman encoding"

record huffnode ( l , r , n , c ) # internal and leaf nodes

record huffcode ( c , n , b , i ) # encoding table char, freq, bitstring, bits (int)



procedure main ( )



s := "this is an example for huffman encoding"



Count := huffcount ( s ) # frequency count

Tree := huffTree ( Count ) # heap and tree



Code := [ ] # extract encodings

CodeT := table ( )

every x := huffBits ( Tree ) do

put ( Code , CodeT [ c ] := huffcode ( c := x [ - 1 ] , Count [ c ] . n , b := x [ 1 :- 1 ] , integer ( "2r" || b ) ) )





Code := sortf ( Code , 1 ) # show table in char order

write ( "Input String : " , image ( s ) )

write ( right ( "char" , 5 ) , right ( "freq" , 5 ) , " encoding" )

every write ( right ( image ( ( x := ! Code ) . c ) , 5 ) , right ( x . n , 5 ) , " " , x . b )



end



procedure huffBits ( N ) # generates huffman bitcodes with trailing character

if \ N . c then return N . c # . append leaf char code

suspend "0" || huffBits ( N . l ) # . left

suspend "1" || huffBits ( N . r ) # . right

end





procedure huffTree ( T ) # two queue huffman tree method

local Q1 , Q2 , x , n1 , n2



Q1 := [ ] # queue of characters and weights

every x := ! T do # ensure all are huffnodes

if type ( x ) == "huffnode" then put ( Q1 , x ) else runerr ( 205 , x )

Q1 := sortf ( Q1 , 3 ) # sort by weight ( 3 means by .n )



if * Q1 > 1 then Q2 := [ ]

while * Q1 +*\ Q2 > 1 do { # While there is more than one node ...



n1 := if Q1 [ 1 ] & ( ( Q1 [ 1 ] . n <= Q2 [ 1 ] . n ) | not Q2 [ 1 ] ) then get ( Q1 ) else get ( Q2 ) # lowest weight from Q1 or Q2

n2 := if Q1 [ 1 ] & ( ( Q1 [ 1 ] . n <= Q2 [ 1 ] . n ) | not Q2 [ 1 ] ) then get ( Q1 ) else get ( Q2 ) # lowest weight from Q1 or Q2



put ( Q2 , huffnode ( n1 , n2 , n1 . n + n2 . n ) ) # new weighted node to end of Q2

}



return ( \ Q2 | Q1 ) [ 1 ] # return the root node

end



procedure huffcount ( s ) # return characters and frequencies in a table of huffnodes by char

local c , T



T := table ( )

every c := ! s do {

/ T [ c ] := huffnode ( ,, 0 , c )

T [ c ] . n +:= 1

}

return T

end

Output:

Input String : "this is an example for huffman encoding" char freq encoding " " 6 101 "a" 3 1100 "c" 1 10000 "d" 1 10001 "e" 3 1101 "f" 3 1110 "g" 1 11110 "h" 2 11111 "i" 3 1001 "l" 1 01101 "m" 2 0011 "n" 4 000 "o" 2 0100 "p" 1 01100 "r" 1 01110 "s" 2 0010 "t" 1 01010 "u" 1 01111 "x" 1 01011

The following Unicon specific solution takes advantage of the Heap priority queue implementation found in the UniLib Collections package and implements the algorithm given in the problem description. The program produces Huffman codes based on each line of input.

import Collections



procedure main ( A )

every line := !& input do {

every ( t := table ( 0 ) ) [ ! line ] +:= 1 # Frequency table

heap := Heap ( sort ( t ) , field , "<" ) # Initial priority queue

while heap . size ( ) > 1 do { # Tree construction

every ( p1 | p2 ) := heap . get ( )

heap . add ( [ & null , p1 [ 2 ] + p2 [ 2 ] , p1 , p2 ] )

}

codes := treeWalk ( heap . get ( ) , "" ) # Get codes from tree

write ( "Huffman encoding:" ) # Display codes

every pair := ! sort ( codes ) do

write ( " \t '" ,\ pair [ 1 ] , "'-> " , pair [ 2 ] )

}

end



procedure field ( node ) # selector function for Heap

return node [ 2 ] # field to use for priority ordering

end



procedure treeWalk ( node , prefix , codeMap )

/ codeMap := table ( "" )

if / node [ 1 ] then { # interior node

treeWalk ( node [ 3 ] , prefix || "0" , codeMap )

treeWalk ( node [ 4 ] , prefix || "1" , codeMap )

}

else codeMap [ node [ 1 ] ] := prefix

return codeMap

end

A sample run:

->huffman this is an example for huffman encoding Huffman encoding: ' '-> 111 'a'-> 1001 'c'-> 00110 'd'-> 00000 'e'-> 1011 'f'-> 1101 'g'-> 101010 'h'-> 0001 'i'-> 1100 'l'-> 10001 'm'-> 0100 'n'-> 011 'o'-> 0101 'p'-> 101011 'r'-> 10100 's'-> 0010 't'-> 00001 'u'-> 10000 'x'-> 00111 aardvarks are ant eaters Huffman encoding: ' '-> 011 'a'-> 10 'd'-> 0010 'e'-> 010 'k'-> 0011 'n'-> 0001 'r'-> 110 's'-> 1111 't'-> 1110 'v'-> 0000 ->

HuffStuff provides huffman encoding routines

Solution (drawn from the J wiki):

hc=: 4 : 0

if. 1 =# x do. y

else. (( i{ x ) ,+/j{ x ) hc ( i{ y ) ,<j{ y [ i=. ( i.# x ) -. j=. 2 {./: x end.

)



hcodes=: 4 : 0

assert. x -:&$ y NB. weights and words have same shape

assert. ( 0 <: x ) *. 1 =#$ x NB. weights are non-negative

assert. 1 >: L. y NB. words are boxed not more than once

w=. ,&.> y NB. standardized words

assert. w -: ~.w NB. words are unique

t=. 0 {:: x hc w NB. minimal weight binary tree

(( < S: 0 t ) i. w ) { <@ ( 1 &= ) @; S: 1 {:: t

)

;" 1 ":L: 0 ( #/.~ ( ],. ( < ' ' ) ,.hcodes ) ,&.>@~. ) 'this is an example for huffman encoding'

t 0 1 0 1 0

h 1 1 1 1 1

i 1 0 0 1

s 0 0 1 0

1 0 1

a 1 1 0 0

n 0 0 0

e 1 1 0 1

x 0 1 0 1 1

m 0 0 1 1

p 0 1 1 0 0

l 0 1 1 0 1

f 1 1 1 0

o 0 1 0 0

r 0 1 1 1 0

u 0 1 1 1 1

c 1 0 0 0 0

d 1 0 0 0 1

g 1 1 1 1 0

This implementation creates an actual tree structure, and then traverses the tree to recover the code.

import java.util.* ;



abstract class HuffmanTree implements Comparable < HuffmanTree > {

public final int frequency ; // the frequency of this tree

public HuffmanTree ( int freq ) { frequency = freq ; }



// compares on the frequency

public int compareTo ( HuffmanTree tree ) {

return frequency - tree. frequency ;

}

}



class HuffmanLeaf extends HuffmanTree {

public final char value ; // the character this leaf represents



public HuffmanLeaf ( int freq, char val ) {

super ( freq ) ;

value = val ;

}

}



class HuffmanNode extends HuffmanTree {

public final HuffmanTree left, right ; // subtrees



public HuffmanNode ( HuffmanTree l, HuffmanTree r ) {

super ( l. frequency + r. frequency ) ;

left = l ;

right = r ;

}

}



public class HuffmanCode {

// input is an array of frequencies, indexed by character code

public static HuffmanTree buildTree ( int [ ] charFreqs ) {

PriorityQueue < HuffmanTree > trees = new PriorityQueue < HuffmanTree > ( ) ;

// initially, we have a forest of leaves

// one for each non-empty character

for ( int i = 0 ; i < charFreqs. length ; i ++ )

if ( charFreqs [ i ] > 0 )

trees. offer ( new HuffmanLeaf ( charFreqs [ i ] , ( char ) i ) ) ;



assert trees. size ( ) > 0 ;

// loop until there is only one tree left

while ( trees. size ( ) > 1 ) {

// two trees with least frequency

HuffmanTree a = trees. poll ( ) ;

HuffmanTree b = trees. poll ( ) ;



// put into new node and re-insert into queue

trees. offer ( new HuffmanNode ( a, b ) ) ;

}

return trees. poll ( ) ;

}



public static void printCodes ( HuffmanTree tree, StringBuffer prefix ) {

assert tree != null ;

if ( tree instanceof HuffmanLeaf ) {

HuffmanLeaf leaf = ( HuffmanLeaf ) tree ;



// print out character, frequency, and code for this leaf (which is just the prefix)

System . out . println ( leaf. value + " \t " + leaf. frequency + " \t " + prefix ) ;



} else if ( tree instanceof HuffmanNode ) {

HuffmanNode node = ( HuffmanNode ) tree ;



// traverse left

prefix. append ( '0' ) ;

printCodes ( node. left , prefix ) ;

prefix. deleteCharAt ( prefix. length ( ) - 1 ) ;



// traverse right

prefix. append ( '1' ) ;

printCodes ( node. right , prefix ) ;

prefix. deleteCharAt ( prefix. length ( ) - 1 ) ;

}

}



public static void main ( String [ ] args ) {

String test = "this is an example for huffman encoding" ;



// we will assume that all our characters will have

// code less than 256, for simplicity

int [ ] charFreqs = new int [ 256 ] ;

// read each character and record the frequencies

for ( char c : test. toCharArray ( ) )

charFreqs [ c ] ++;



// build tree

HuffmanTree tree = buildTree ( charFreqs ) ;



// print out results

System . out . println ( "SYMBOL \t WEIGHT \t HUFFMAN CODE" ) ;

printCodes ( tree, new StringBuffer ( ) ) ;

}

}

Output:

SYMBOL WEIGHT HUFFMAN CODE d 1 00000 t 1 00001 h 2 0001 s 2 0010 c 1 00110 x 1 00111 m 2 0100 o 2 0101 n 4 011 u 1 10000 l 1 10001 a 3 1001 r 1 10100 g 1 101010 p 1 101011 e 3 1011 i 3 1100 f 3 1101 6 111

Translation of: Ruby

Works with: SpiderMonkey

print()

for thefunction.

First, use the Binary Heap implementation from here: http://eloquentjavascript.net/appendix2.html

The Huffman encoder

function HuffmanEncoding ( str ) {

this . str = str ;



var count_chars = { } ;

for ( var i = 0 ; i < str. length ; i ++ )

if ( str [ i ] in count_chars )

count_chars [ str [ i ] ] ++;

else

count_chars [ str [ i ] ] = 1 ;



var pq = new BinaryHeap ( function ( x ) { return x [ 0 ] ; } ) ;

for ( var ch in count_chars )

pq. push ( [ count_chars [ ch ] , ch ] ) ;



while ( pq. size ( ) > 1 ) {

var pair1 = pq. pop ( ) ;

var pair2 = pq. pop ( ) ;

pq. push ( [ pair1 [ 0 ] + pair2 [ 0 ] , [ pair1 [ 1 ] , pair2 [ 1 ] ] ] ) ;

}



var tree = pq. pop ( ) ;

this . encoding = { } ;

this ._generate_encoding ( tree [ 1 ] , "" ) ;



this . encoded_string = ""

for ( var i = 0 ; i < this . str . length ; i ++ ) {

this . encoded_string += this . encoding [ str [ i ] ] ;

}

}



HuffmanEncoding. prototype ._generate_encoding = function ( ary , prefix ) {

if ( ary instanceof Array ) {

this ._generate_encoding ( ary [ 0 ] , prefix + "0" ) ;

this ._generate_encoding ( ary [ 1 ] , prefix + "1" ) ;

}

else {

this . encoding [ ary ] = prefix ;

}

}



HuffmanEncoding. prototype . inspect_encoding = function ( ) {

for ( var ch in this . encoding ) {

print ( "'" + ch + "': " + this . encoding [ ch ] )

}

}



HuffmanEncoding. prototype . decode = function ( encoded ) {

var rev_enc = { } ;

for ( var ch in this . encoding )

rev_enc [ this . encoding [ ch ] ] = ch ;



var decoded = "" ;

var pos = 0 ;

while ( pos < encoded. length ) {

var key = ""

while ( ! ( key in rev_enc ) ) {

key += encoded [ pos ] ;

pos ++;

}

decoded += rev_enc [ key ] ;

}

return decoded ;

}

And, using the Huffman encoder

var s = "this is an example for huffman encoding" ;

print ( s ) ;



var huff = new HuffmanEncoding ( s ) ;

huff. inspect_encoding ( ) ;



var e = huff. encoded_string ;

print ( e ) ;



var t = huff. decode ( e ) ;

print ( t ) ;



print ( "is decoded string same as original? " + ( s == t ) ) ;

Output:

this is an example for huffman encoding 'n': 000 's': 0010 'm': 0011 'o': 0100 't': 01010 'x': 01011 'p': 01100 'l': 01101 'r': 01110 'u': 01111 'c': 10000 'd': 10001 'i': 1001 ' ': 101 'a': 1100 'e': 1101 'f': 1110 'g': 11110 'h': 11111 0101011111100100101011001001010111000001011101010111100001101100011011101101111001000111010111111011111110111000111100000101110100010000010010001100100011110 this is an example for huffman encoding is decoded string same as original? true



abstract type HuffmanTree end



struct HuffmanLeaf <: HuffmanTree

ch::Char

freq::Int

end



struct HuffmanNode <: HuffmanTree

freq::Int

left::HuffmanTree

right::HuffmanTree

end



function makefreqdict(s::String)

d = Dict{Char, Int}()

for c in s

if !haskey(d, c)

d[c] = 1

else

d[c] += 1

end

end

d

end



function huffmantree(ftable::Dict)

trees::Vector{HuffmanTree} = [HuffmanLeaf(ch, fq) for (ch, fq) in ftable]

while length(trees) > 1

sort!(trees, lt = (x, y) -> x.freq < y.freq, rev = true)

least = pop!(trees)

nextleast = pop!(trees)

push!(trees, HuffmanNode(least.freq + nextleast.freq, least, nextleast))

end

trees[1]

end



printencoding(lf::HuffmanLeaf, code) = println(lf.ch == ' ' ? "space" : lf.ch, "\t", lf.freq, "\t", code)



function printencoding(nd::HuffmanNode, code)

code *= '0'

printencoding(nd.left, code)

code = code[1:end-1]



code *= '1'

printencoding(nd.right, code)

code = code[1:end-1]

end



const msg = "this is an example for huffman encoding"



println("Char\tFreq\tHuffman code")



printencoding(huffmantree(makefreqdict(msg)), "")



Output:

Char Freq Huffman code p 1 00000 c 1 00001 g 1 00010 x 1 00011 n 4 001 s 2 0100 h 2 0101 u 1 01100 l 1 01101 m 2 0111 o 2 1000 d 1 10010 r 1 100110 t 1 100111 e 3 1010 f 3 1011 a 3 1100 i 3 1101 space 6 111

Translation of: Java

This implementation creates an actual tree structure, and then traverses the tree to recover the code.

import java.util.*



abstract class HuffmanTree(var freq: Int) : Comparable<HuffmanTree> {

override fun compareTo(other: HuffmanTree) = freq - other.freq

}



class HuffmanLeaf(freq: Int, var value: Char) : HuffmanTree(freq)



class HuffmanNode(var left: HuffmanTree, var right: HuffmanTree) : HuffmanTree(left.freq + right.freq)



fun buildTree(charFreqs: IntArray) : HuffmanTree {

val trees = PriorityQueue<HuffmanTree>()



charFreqs.forEachIndexed { index, freq ->

if(freq > 0) trees.offer(HuffmanLeaf(freq, index.toChar()))

}



assert(trees.size > 0)

while (trees.size > 1) {

val a = trees.poll()

val b = trees.poll()

trees.offer(HuffmanNode(a, b))

}



return trees.poll()

}



fun printCodes(tree: HuffmanTree, prefix: StringBuffer) {

when(tree) {

is HuffmanLeaf -> println("${tree.value}\t${tree.freq}\t$prefix")

is HuffmanNode -> {

//traverse left

prefix.append('0')

printCodes(tree.left, prefix)

prefix.deleteCharAt(prefix.lastIndex)

//traverse right

prefix.append('1')

printCodes(tree.right, prefix)

prefix.deleteCharAt(prefix.lastIndex)

}

}

}



fun main(args: Array<String>) {

val test = "this is an example for huffman encoding"



val maxIndex = test.max()!!.toInt() + 1

val freqs = IntArray(maxIndex) //256 enough for latin ASCII table, but dynamic size is more fun

test.forEach { freqs[it.toInt()] += 1 }



val tree = buildTree(freqs)

println("SYMBOL\tWEIGHT\tHUFFMAN CODE")

printCodes(tree, StringBuffer())

}

Output:

SYMBOL WEIGHT HUFFMAN CODE d 1 00000 t 1 00001 h 2 0001 s 2 0010 c 1 00110 x 1 00111 m 2 0100 o 2 0101 n 4 011 u 1 10000 l 1 10001 a 3 1001 r 1 10100 g 1 101010 p 1 101011 e 3 1011 i 3 1100 f 3 1101 6 111

Translation of: Lua

This implementation proceeds in three steps: determine word frequencies, construct the Huffman tree, and finally fold the tree into the codes while outputting them.

local build_freqtable = function ( data )

local freq = { }



for i = 1 , # data do

local cur = string.sub ( data , i , i )

local count = freq [ cur ] or 0

freq [ cur ] = count + 1

end



local nodes = { }

for w , f in next , freq do

nodes [ # nodes + 1 ] = { word = w , freq = f }

end



table.sort ( nodes , function ( a , b ) return a . freq > b . freq end ) --- reverse order!



return nodes

end



local build_hufftree = function ( nodes )

while true do

local n = # nodes

local left = nodes [ n ]

nodes [ n ] = nil



local right = nodes [ n - 1 ]

nodes [ n - 1 ] = nil



local new = { freq = left . freq + right . freq , left = left , right = right }



if n == 2 then return new end



--- insert new node at correct priority

local prio = 1

while prio < # nodes and nodes [ prio ] . freq > new . freq do

prio = prio + 1

end

table.insert ( nodes , prio , new )

end

end



local print_huffcodes do

local rec_build_huffcodes

rec_build_huffcodes = function ( node , bits , acc )

if node . word == nil then

rec_build_huffcodes ( node . left , bits .. "0" , acc )

rec_build_huffcodes ( node . right , bits .. "1" , acc )

return acc

else --- leaf

acc [ # acc + 1 ] = { node . freq , node . word , bits }

end

return acc

end



print_huffcodes = function ( root )

local codes = rec_build_huffcodes ( root , "" , { } )

table.sort ( codes , function ( a , b ) return a [ 1 ] < b [ 1 ] end )

print ( "frequency \t word \t huffman code" )

for i = 1 , # codes do

print ( string.format ( "%9d \t ‘%s’ \t “%s”" , table . unpack ( codes [ i ] ) ) )

end

end

end





local huffcode = function ( data )

local nodes = build_freqtable ( data )

local huff = build_hufftree ( nodes )

print_huffcodes ( huff )

return 0

end



return huffcode "this is an example for huffman encoding"





frequency word huffman code 1 ‘g’ “01111” 1 ‘p’ “01011” 1 ‘d’ “01100” 1 ‘c’ “01101” 1 ‘t’ “01010” 1 ‘r’ “10000” 1 ‘u’ “11110” 1 ‘x’ “10001” 1 ‘l’ “01110” 2 ‘o’ “11111” 2 ‘m’ “0011” 2 ‘h’ “0010” 2 ‘s’ “0100” 3 ‘i’ “1101” 3 ‘f’ “1110” 3 ‘a’ “1100” 3 ‘e’ “1001” 4 ‘n’ “000” 6 ‘ ’ “101”



Module Huffman {

comp=lambda (a, b) ->{

=array(a, 0)<array(b, 0)

}

module InsertPQ (a, n, &comp) {

if len(a)=0 then stack a {data n} : exit

if comp(n, stackitem(a)) then stack a {push n} : exit

stack a {

push n

t=2: b=len(a)

m=b

While t<=b {

t1=m

m=(b+t) div 2

if m=0 then m=t1 : exit

If comp(stackitem(m),n) then t=m+1: continue

b=m-1

m=b

}

if m>1 then shiftback m

}

}



a$="this is an example for huffman encoding"



inventory queue freq

For i=1 to len(a$) {

b$=mid$(a$,i,1)

if exist(freq, b$) then Return freq, b$:=freq(b$)+1 : continue

append freq, b$:=1

}

sort ascending freq

b=stack

K=each(freq)

LenA=len(a$)

While k {

InsertPQ b, (Round(Eval(k)/lenA, 4), eval$(k, k^)), &comp

}

While len(b)>1 {

Stack b {

Read m1, m2

InsertPQ b, (Array(m1)+Array(m2), (m1, m2) ), &comp

}

}

Print "Size of stack object (has only Root):"; len(b)

Print "Root probability:";Round(Array(Stackitem(b)), 3)

inventory encode, decode



Traverse(stackitem(b), "")

message$=""

For i=1 to len(a$)

message$+=encode$(mid$(a$, i, 1))

Next i



Print message$

j=1

check$=""

For i=1 to len(a$)

d=each(encode)

While d {

code$=eval$(d)

if mid$(message$, j, len(code$))=code$ then {

check$+=decode$(code$)

Print decode$(code$); : j+=len(code$)

}

}

Next i

Print

Print len(message$);" bits ", if$(a$=check$->"Encoding/decoding worked", "Encoding/Decoding failed")





Sub Traverse(a, a$)

local b=array(a,1)

if type$(b)="mArray" Else {

Print @(10); quote$(array$(a, 1));" "; a$,@(20),array(a)

Append decode, a$ :=array$(a, 1)

Append encode, array$(a, 1):=a$

Exit Sub

}

traverse(array(b), a$+"0")

traverse(array(b,1), a$+"1")

End Sub

}

Huffman



Output:

"p" 00000 0,0256 "l" 00001 0,0256 "t" 00010 0,0256 "r" 00011 0,0256 "x" 00100 0,0256 "u" 00101 0,0256 "s" 0011 0,0513 "o" 0100 0,0513 "m" 0101 0,0513 "n" 011 0,1026 "h" 1000 0,0513 "c" 10010 0,0256 "g" 100110 0,0256 "d" 100111 0,0256 "e" 1010 0,0769 "a" 1011 0,0769 "i" 1100 0,0769 "f" 1101 0,0769 " " 111 0,1538 0001010001100001111111000011111101101111110100010010110101000000000110101111101010000011111100000101110111010101101101111110100111001001001001111100011100110 this is an example for huffman encoding 157 bits Encoding/decoding worked

huffman[s_String] := huffman[Characters[s]];

huffman[l_List] := Module[{merge, structure, rules},



(*merge front two branches. list is assumed to be sorted*)

merge[k_] := Replace[k, {{a_, aC_}, {b_, bC_}, rest___} :> {{{a, b}, aC + bC}, rest}];



structure = FixedPoint[

Composition[merge, SortBy[#, Last] &],

Tally[l]][[1, 1]];



rules = (# -> Flatten[Position[structure, #] - 1]) & /@ DeleteDuplicates[l];



{Flatten[l /. rules], rules}];

import tables, seqUtils



const sampleString = "this is an example for huffman encoding"



type

# Following range can be changed to produce Huffman codes on arbitrary alphabet (e.g. ternary codes)

CodeSymbol = range[0..1]

HuffCode = seq[CodeSymbol]

Node = ref object

f: int

parent: Node

case isLeaf: bool

of true:

c: char

else:

childs: array[CodeSymbol, Node]



proc `<`(a: Node, b: Node): bool =

# For min operator

a.f < b.f



proc `$`(hc: HuffCode): string =

result = ""

for symbol in hc:

result &= $symbol



proc freeChildList(tree: seq[Node], parent: Node = nil): seq[Node] =

# Constructs a sequence of nodes which can be adopted

# Optional parent parameter can be set to ensure node will not adopt itself

result = @[]

for node in tree:

if node.parent == nil and node != parent:

result.add(node)



proc connect(parent: Node, child: Node) =

# Only call this proc when sure that parent has a free child slot

child.parent = parent

parent.f += child.f

for i in parent.childs.low..parent.childs.high:

if parent.childs[i] == nil:

parent.childs[i] = child

return



proc generateCodes(codes: TableRef[char, HuffCode], currentNode: Node, currentCode: HuffCode = @[]) =

if currentNode.isLeaf:

let key = currentNode.c

codes[key] = currentCode

return

for i in currentNode.childs.low..currentNode.childs.high:

if currentNode.childs[i] != nil:

let newCode = currentCode & i

generateCodes(codes, currentNode.childs[i], newCode)



proc buildTree(frequencies: CountTable[char]): seq[Node] =

result = newSeq[Node](frequencies.len)

for i in result.low..result.high:

let key = toSeq(frequencies.keys)[i]

result[i] = Node(f: frequencies[key], isLeaf: true, c: key)

while result.freeChildList.len > 1:

let currentNode = new Node

result.add(currentNode)

for c in currentNode.childs:

currentNode.connect(min(result.freeChildList(currentNode)))

if result.freeChildList.len <= 1:

break



var sampleFrequencies = initCountTable[char]()

for c in sampleString:

sampleFrequencies.inc(c)

let

tree = buildTree(sampleFrequencies)

root = tree.freeChildList[0]

var huffCodes = newTable[char, HuffCode]()

generateCodes(huffCodes, root)

echo huffCodes

Output:

{ : 101, a: 1001, c: 01010, d: 01011, e: 1100, f: 1101, g: 01100, h: 11111, i: 1110, l: 01101, m: 0010, n: 000, o: 0011, p: 01110, r: 01111, s: 0100, t: 10000, u: 10001, x: 11110}

Works with: oo2c



MODULE HuffmanEncoding;

IMPORT

Object ,

PriorityQueue ,

Strings ,

Out;

TYPE

Leaf = POINTER TO LeafDesc;

LeafDesc = RECORD

( Object. ObjectDesc )

c : CHAR ;

END ;



Inner = POINTER TO InnerDesc;

InnerDesc = RECORD

( Object. ObjectDesc )

left , right : Object. Object ;

END ;



VAR

str : ARRAY 128 OF CHAR ;

i : INTEGER ;

f : ARRAY 96 OF INTEGER ;

q : PriorityQueue. Queue ;

a : PriorityQueue. Node ;

b : PriorityQueue. Node ;

c : PriorityQueue. Node ;

h : ARRAY 64 OF CHAR ;



PROCEDURE NewLeaf ( c : CHAR ) : Leaf;

VAR

x : Leaf;

BEGIN

NEW ( x ) ;x. c := c; RETURN x

END NewLeaf;



PROCEDURE NewInner ( l , r : Object. Object ) : Inner;

VAR

x : Inner;

BEGIN

NEW ( x ) ; x. left := l; x. right := r; RETURN x

END NewInner;





PROCEDURE Preorder ( n : Object. Object ; VAR x : ARRAY OF CHAR ) ;

BEGIN

IF n IS Leaf THEN

Out. Char ( n ( Leaf ) . c ) ;Out. String ( ": " ) ;Out. String ( h ) ;Out. Ln

ELSE

IF n ( Inner ) . left # NIL THEN

Strings. Append ( "0" , x ) ;

Preorder ( n ( Inner ) . left , x ) ;

Strings. Delete ( x , ( Strings. Length ( x ) - 1 ) , 1 )

END ;

IF n ( Inner ) . right # NIL THEN

Strings. Append ( "1" , x ) ;

Preorder ( n ( Inner ) . right , x ) ;

Strings. Delete ( x , ( Strings. Length ( x ) - 1 ) , 1 )

END

END

END Preorder;



BEGIN

str := "this is an example for huffman encoding" ;



(* Collect letter frecuencies *)

i := 0 ;

WHILE str [ i ] # 0X DO INC ( f [ ORD ( CAP ( str [ i ] ) ) - ORD ( ' ' ) ] ) ; INC ( i ) END ;



(* Create Priority Queue *)

NEW ( q ) ;q. Clear ( ) ;



(* Insert into the queue *)

i := 0 ;

WHILE ( i < LEN ( f ) ) DO

IF f [ i ] # 0 THEN

q. Insert ( f [ i ] / Strings. Length ( str ) , NewLeaf ( CHR ( i + ORD ( ' ' ) ) ) )

END ;

INC ( i )

END ;



(* create tree *)

WHILE q. Length ( ) > 1 DO

q. Remove ( a ) ;q. Remove ( b ) ;

q. Insert ( a. w + b. w , NewInner ( a. d , b. d ) ) ;

END ;



(* tree traversal *)

h [ 0 ] := 0X;q. Remove ( c ) ;Preorder ( c. d , h ) ;



END HuffmanEncoding.



Output:

D: 00000 T: 00001 H: 0001 S: 0010 C: 00110 X: 00111 M: 0100 O: 0101 N: 011 U: 10000 L: 10001 A: 1001 R: 10100 G: 101010 P: 101011 E: 1011 I: 1100 F: 1101 : 111

Translation of: Java

This is not purely Objective-C. It uses Apple's Core Foundation library for its binary heap, which admittedly is very ugly. Thus, this only builds on Mac OS X, not GNUstep.

#import <Foundation/Foundation.h>





@interface HuffmanTree : NSObject {

int freq;

}

- ( instancetype ) initWithFreq : ( int ) f;

@property ( nonatomic, readonly ) int freq;

@end



@implementation HuffmanTree

@synthesize freq; // the frequency of this tree

- ( instancetype ) initWithFreq : ( int ) f {

if ( self = [ super init ] ) {

freq = f;

}

return self;

}

@end





const void * HuffmanRetain ( CFAllocatorRef allocator, const void * ptr ) {

return ( __bridge_retained const void * ) ( __bridge id ) ptr;

}

void HuffmanRelease ( CFAllocatorRef allocator, const void * ptr ) {

( void ) ( __bridge_transfer id ) ptr;

}

CFComparisonResult HuffmanCompare ( const void * ptr1, const void * ptr2, void * unused ) {

int f1 = ( ( __bridge HuffmanTree * ) ptr1 ) .freq;

int f2 = ( ( __bridge HuffmanTree * ) ptr2 ) .freq;

if ( f1 == f2 )

return kCFCompareEqualTo;

else if ( f1 > f2 )

return kCFCompareGreaterThan;

else

return kCFCompareLessThan;

}





@interface HuffmanLeaf : HuffmanTree {

char value; // the character this leaf represents

}

@property ( readonly ) char value;

- ( instancetype ) initWithFreq : ( int ) f character : ( char ) c;

@end



@implementation HuffmanLeaf

@synthesize value;

- ( instancetype ) initWithFreq : ( int ) f character : ( char ) c {

if ( self = [ super initWithFreq : f ] ) {

value = c;

}

return self;

}

@end





@interface HuffmanNode : HuffmanTree {

HuffmanTree * left, * right; // subtrees

}

@property ( readonly ) HuffmanTree * left, * right;

- ( instancetype ) initWithLeft : ( HuffmanTree * ) l right : ( HuffmanTree * ) r;

@end



@implementation HuffmanNode

@synthesize left, right;

- ( instancetype ) initWithLeft : ( HuffmanTree * ) l right : ( HuffmanTree * ) r {

if ( self = [ super initWithFreq : l.freq + r.freq ] ) {

left = l;

right = r;

}

return self;

}

@end





HuffmanTree * buildTree ( NSCountedSet * chars ) {



CFBinaryHeapCallBacks callBacks = { 0 , HuffmanRetain, HuffmanRelease, NULL , HuffmanCompare } ;

CFBinaryHeapRef trees = CFBinaryHeapCreate ( NULL , 0 , & callBacks, NULL ) ;



// initially, we have a forest of leaves

// one for each non-empty character

for ( NSNumber * ch in chars ) {

int freq = [ chars countForObject : ch ] ;

if ( freq > 0 )

CFBinaryHeapAddValue ( trees, ( __bridge const void * ) [ [ HuffmanLeaf alloc ] initWithFreq : freq character : ( char ) [ ch intValue ] ] ) ;

}



NSCAssert ( CFBinaryHeapGetCount ( trees ) > 0 , @ "String must have at least one character" ) ;

// loop until there is only one tree left

while ( CFBinaryHeapGetCount ( trees ) > 1 ) {

// two trees with least frequency

HuffmanTree * a = ( __bridge HuffmanTree * ) CFBinaryHeapGetMinimum ( trees ) ;

CFBinaryHeapRemoveMinimumValue ( trees ) ;

HuffmanTree * b = ( __bridge HuffmanTree * ) CFBinaryHeapGetMinimum ( trees ) ;

CFBinaryHeapRemoveMinimumValue ( trees ) ;



// put into new node and re-insert into queue

CFBinaryHeapAddValue ( trees, ( __bridge const void * ) [ [ HuffmanNode alloc ] initWithLeft : a right : b ] ) ;

}

HuffmanTree * result = ( __bridge HuffmanTree * ) CFBinaryHeapGetMinimum ( trees ) ;

CFRelease ( trees ) ;

return result;

}



void printCodes ( HuffmanTree * tree, NSMutableString * prefix ) {

NSCAssert ( tree != nil , @ "tree must not be nil" ) ;

if ( [ tree isKindOfClass : [ HuffmanLeaf class ] ] ) {

HuffmanLeaf * leaf = ( HuffmanLeaf * ) tree;



// print out character, frequency, and code for this leaf (which is just the prefix)

NSLog ( @ "%c \t %d \t %@" , leaf.value, leaf.freq, prefix ) ;



} else if ( [ tree isKindOfClass : [ HuffmanNode class ] ] ) {

HuffmanNode * node = ( HuffmanNode * ) tree;



// traverse left

[ prefix appendString : @ "0" ] ;

printCodes ( node.left, prefix ) ;

[ prefix deleteCharactersInRange : NSMakeRange ( [ prefix length ] - 1 , 1 ) ] ;



// traverse right

[ prefix appendString : @ "1" ] ;

printCodes ( node.right, prefix ) ;

[ prefix deleteCharactersInRange : NSMakeRange ( [ prefix length ] - 1 , 1 ) ] ;

}

}



int main ( int argc, const char * argv [ ] ) {

@autoreleasepool {



NSString * test = @ "this is an example for huffman encoding" ;



// read each character and record the frequencies

NSCountedSet * chars = [ [ NSCountedSet alloc ] init ] ;

int n = [ test length ] ;

for ( int i = 0 ; i < n; i ++ )

[ chars addObject : @ ( [ test characterAtIndex : i ] ) ] ;



// build tree

HuffmanTree * tree = buildTree ( chars ) ;



// print out results

NSLog ( @ "SYMBOL \t WEIGHT \t HUFFMAN CODE" ) ;

printCodes ( tree, [ NSMutableString string ] ) ;



}

return 0 ;

}

Output:

SYMBOL WEIGHT HUFFMAN CODE g 1 00000 x 1 00001 m 2 0001 d 1 00100 u 1 00101 t 1 00110 r 1 00111 n 4 010 s 2 0110 o 2 0111 p 1 10000 l 1 10001 a 3 1001 6 101 f 3 1100 e 3 1101 c 1 11100 h 2 11101 i 3 1111

Translation of: Standard ML

We use a Set (which is automatically sorted) as a priority queue.

Works with: OCaml version 4.02+

type ' a huffman_tree =

| Leaf of ' a

| Node of ' a huffman_tree * ' a huffman_tree



module HSet = Set . Make

( struct

type t = int * char huffman_tree (* pair of frequency and the tree *)

let compare = compare

(* We can use the built-in compare function to order this: it will order

first by the first element (frequency) and then by the second (the tree),

the latter of which we don't care about but which helps prevent elements

from being equal, since Set does not allow duplicate elements *)

end ) ;;



let build_tree charFreqs =

let leaves = HSet . of_list ( List . map ( fun ( c,f ) -> ( f, Leaf c ) ) charFreqs ) in

let rec aux trees =

let f1, a = HSet . min_elt trees in

let trees ' = HSet . remove ( f1,a ) trees in

if HSet . is_empty trees ' then

a

else

let f2, b = HSet . min_elt trees ' in

let trees '' = HSet . remove ( f2,b ) trees ' in

let trees ''' = HSet . add ( f1 + f2, Node ( a, b ) ) trees '' in

aux trees '''

in

aux leaves



let rec print_tree code = function

| Leaf c ->

Printf . printf "%c\t%s

" c ( String . concat "" ( List . rev code ) ) ;

| Node ( l, r ) ->

print_tree ( "0" :: code ) l ;

print_tree ( "1" :: code ) r



let ( ) =

let str = "this is an example for huffman encoding" in

let charFreqs = Hashtbl . create 42 in

String . iter ( fun c ->

let old =

try Hashtbl . find charFreqs c

with Not_found -> 0 in

Hashtbl . replace charFreqs c ( old + 1 )

) str ;



let charFreqs = Hashtbl . fold ( fun c f acc -> ( c,f ) :: acc ) charFreqs [ ] in

let tree = build_tree charFreqs in

print_string "Symbol\tHuffman code

" ;

print_tree [ ] tree



( define phrase "this is an example for huffman encoding" )



; prepare initial probabilities table

( define table ( ff -> list

( fold ( lambda ( ff x )

( put ff x ( + ( ff x 0 ) 1 ) ) )

{ }

( string -> runes phrase ) ) ) )



; just sorter...

( define ( resort l )

( sort ( lambda ( x y ) ( < ( cdr x ) ( cdr y ) ) ) l ) )

; ...to sort table

( define table ( resort table ) )



; build huffman tree

( define tree

( let loop ( ( table table ) )

( if ( null? ( cdr table ) )

( car tabl