commit 05df0337906560e203a8cffdc5eefc925d96de79
parent 04e1cb701f48484298ed5b1149bb13cd730f9dea
Author: ben <benjaminlgreenman@gmail.com>
Date: Wed, 23 Mar 2016 00:06:33 -0400
[bg] huffman trees
Diffstat:
3 files changed, 311 insertions(+), 0 deletions(-)
diff --git a/tapl/mlish.rkt b/tapl/mlish.rkt
@@ -540,6 +540,7 @@
(define-primop string : (→ Char String))
(define-primop sleep : (→ Int Unit))
(define-primop string=? : (→ String String Bool))
+(define-primop string<=? : (→ String String Bool))
(define-typed-syntax string-append
[(_ . strs)
diff --git a/tapl/tests/mlish/bg/README.md b/tapl/tests/mlish/bg/README.md
@@ -45,4 +45,36 @@ mlish tests by Ben
(map-cps [f : (→ A B)] [x* : (List A)] → (List B))
```
+`huffman`
+---
+```
+(empty → Symbol*)
+(singleton [s : String] → Symbol*)
+(insert [s* : Symbol*] [s1 : String] → Symbol*)
+(union [s1 : Symbol*] [s2 : Symbol*] → Symbol*)
+(contains [s* : Symbol*] [s : Symbol] → Bool)
+(list [x : A] → (List A))
+(append [x* : (List A)] [y* : (List A)] → (List A))
+(length [x* : (List A)] → Int)
+(symbols [h : HTree] → Symbol*)
+(weight [h : HTree] → Int)
+(make-code-tree [left : HTree] [right : HTree] → HTree)
+(decode-aux [bits : Bit*] [root : HTree] [current-branch : HTree] → SymbolList)
+(decode [bits : Bit*] [tree : HTree] → SymbolList)
+(choose-branch [bit : Bit] [branch : HTree] → HTree)
+(adjoin-set [x : HTree] [set : HTreeSet] → HTreeSet)
+(make-leaf-set [pair* : (List (× Symbol Int))] → HTreeSet)
+sample-tree
+sample-message
+(encode [message : SymbolList] [tree : HTree] → Bit*)
+(contains-symbol [s : Symbol] [tree : HTree] → Bool)
+(encode-symbol [s : Symbol] [tree : HTree] → Bit*)
+(generate-huffman-tree [pair* : (List (× Symbol Frequency))] → HTree)
+(successive-merge [tree* : HTreeSet] → HTree)
+rock-pair*
+rock-tree (generate-huffman-tree rock-pair*))
+rock-message
+rock-bit* (encode rock-message rock-tree))
+```
+
diff --git a/tapl/tests/mlish/bg/huffman.mlish b/tapl/tests/mlish/bg/huffman.mlish
@@ -0,0 +1,278 @@
+#lang s-exp "../../../mlish.rkt"
+(require "../../rackunit-typechecking.rkt")
+
+;; Huffman trees from SICP
+
+;; =============================================================================
+;; === Sets of Symbols
+
+(define-type-alias Symbol String)
+
+;; Set of strings
+(define-type Symbol*
+ [Empty]
+ [Singleton String]
+ [Join String Symbol* Symbol*])
+
+(define (empty → Symbol*)
+ Empty)
+
+(define (singleton [s : String] → Symbol*)
+ (Singleton s))
+
+(define (insert [s* : Symbol*] [s1 : String] → Symbol*)
+ (match s* with
+ [Empty -> (singleton s1)]
+ [Singleton s2 ->
+ (if (string<=? s1 s2)
+ (if (string=? s1 s2)
+ s*
+ (Join s2 (singleton s1) (empty)))
+ (Join s1 (singleton s2) (empty)))]
+ [Join s2 l* r* ->
+ (if (string<=? s1 s2)
+ (if (string=? s1 s2)
+ s*
+ (Join s2 (insert l* s1) r*))
+ (Join s2 l* (insert r* s1)))]))
+
+(define (union [s1 : Symbol*] [s2 : Symbol*] → Symbol*)
+ (match s1 with
+ [Empty -> s2]
+ [Singleton s -> (insert s2 s)]
+ [Join s l* r* -> (union l* (union r* (insert s2 s)))]))
+
+(define (contains [s* : Symbol*] [s : Symbol] → Bool)
+ (match s* with
+ [Empty -> #f]
+ [Singleton s2 -> (string=? s s2)]
+ [Join s2 l* r* ->
+ (if (string<=? s s2)
+ (if (string=? s s2)
+ #t
+ (contains l* s))
+ (contains r* s))]))
+
+;; -----------------------------------------------------------------------------
+
+(check-type
+ (insert (empty) "hello")
+ : Symbol*
+ ⇒ (singleton "hello"))
+
+(check-type
+ (insert (insert (empty) "a") "b")
+ : Symbol*
+ ⇒ (Join "b" (singleton "a") (empty)))
+
+(check-type
+ (insert (insert (empty) "b") "a")
+ : Symbol*
+ ⇒ (Join "b" (singleton "a") (empty)))
+
+(check-type
+ (insert (insert (insert (empty) "a") "b") "c")
+ : Symbol*
+ ⇒ (Join "b" (singleton "a") (singleton "c")))
+
+(check-type
+ (insert (insert (insert (empty) "c") "b") "a")
+ : Symbol*
+ ⇒ (Join "c" (Join "b" (singleton "a") (empty)) (empty)))
+
+(check-type
+ (union
+ (insert (insert (insert (empty) "c") "b") "a")
+ (insert (insert (insert (empty) "a") "b") "c"))
+ : Symbol*
+ ⇒ (Join "b" (singleton "a") (singleton "c")))
+
+;; -----------------------------------------------------------------------------
+
+(define-type (List A)
+ [⊥]
+ [∷ A (List A)])
+
+(define-type-alias SymbolList (List Symbol))
+
+(define (list [x : A] → (List A))
+ (∷ x ⊥))
+
+(define (append [x* : (List A)] [y* : (List A)] → (List A))
+ (match x* with
+ [⊥ -> y*]
+ [∷ x x* ->
+ (∷ x (append x* y*))]))
+
+(define (length [x* : (List A)] → Int)
+ (match x* with
+ [⊥ -> 0]
+ [∷ x x* -> (+ 1 (length x*))]))
+
+;; -----------------------------------------------------------------------------
+
+(define-type Bit O I)
+(define-type-alias Bit* (List Bit))
+
+;; -----------------------------------------------------------------------------
+
+(define-type HTree
+ [Leaf String Int] ;; Symbol, Weight
+ [Node HTree HTree Symbol* Int] ;; Left, Right, Symbols, Weight
+)
+
+(define (symbols [h : HTree] → Symbol*)
+ (match h with
+ [Leaf s w -> (singleton s)]
+ [Node lh rh s* w -> s*]))
+
+(define (weight [h : HTree] → Int)
+ (match h with
+ [Leaf s w -> w]
+ [Node l r s w -> w]))
+
+(define (make-code-tree [left : HTree] [right : HTree] → HTree)
+ (Node left right
+ (union (symbols left) (symbols right))
+ (+ (weight left) (weight right))))
+
+(define (decode-aux [bits : Bit*] [root : HTree] [current-branch : HTree] → SymbolList)
+ (match bits with
+ [⊥ ->
+ ⊥]
+ [∷ b bit* ->
+ (match (choose-branch b current-branch) with
+ [Leaf s w ->
+ (∷ s (decode-aux bit* root root))]
+ [Node l r s* w ->
+ (decode-aux bit* root (Node l r s* w))])]))
+
+(define (decode [bits : Bit*] [tree : HTree] → SymbolList)
+ (decode-aux bits tree tree))
+
+(define (choose-branch [bit : Bit] [branch : HTree] → HTree)
+ (match branch with
+ [Leaf s w ->
+ ;; Error
+ (Leaf "ERROR" 0)]
+ [Node l* r* s* w ->
+ (match bit with
+ [O -> l*]
+ [I -> r*])]))
+
+(define-type-alias HTreeSet (List HTree))
+
+(define (adjoin-set [x : HTree] [set : HTreeSet] → HTreeSet)
+ (match set with
+ [⊥ -> (list x)]
+ [∷ y y* ->
+ (if (< (weight x) (weight y))
+ (∷ x set)
+ (∷ y (adjoin-set x y*)))]))
+
+(define (make-leaf-set [pair* : (List (× Symbol Int))] → HTreeSet)
+ (match pair* with
+ [⊥ -> ⊥]
+ [∷ pair pair* ->
+ (match pair with
+ [s i ->
+ (adjoin-set (Leaf s i) (make-leaf-set pair*))])]))
+
+(check-type
+ (make-leaf-set (∷ (tup "A" 4)
+ (∷ (tup "B" 2)
+ (∷ (tup "C" 1)
+ (∷ (tup "D" 1)
+ ⊥)))))
+ : HTreeSet
+ ⇒ (∷ (Leaf "D" 1)
+ (∷ (Leaf "C" 1)
+ (∷ (Leaf "B" 2)
+ (∷ (Leaf "A" 4)
+ ⊥)))))
+
+(define sample-tree
+ (make-code-tree
+ (Leaf "A" 4)
+ (make-code-tree
+ (Leaf "B" 2)
+ (make-code-tree
+ (Leaf "D" 1)
+ (Leaf "C" 1)))))
+
+(define sample-message
+ (∷ O (∷ I (∷ I (∷ O (∷ O (∷ I (∷ O (∷ I (∷ O (∷ I (∷ I (∷ I (∷ I (∷ O ⊥)))))))))))))))
+
+(check-type
+ (decode sample-message sample-tree)
+ : SymbolList
+ ⇒ (∷ "A" (∷ "D" (∷ "A" (∷ "B" (∷ "B" (∷ "C" (∷ "B" ⊥))))))))
+
+(define (encode [message : SymbolList] [tree : HTree] → Bit*)
+ (match message with
+ [⊥ -> ⊥]
+ [∷ m m* ->
+ (append (encode-symbol m tree) (encode m* tree))]))
+
+(define (contains-symbol [s : Symbol] [tree : HTree] → Bool)
+ (contains (symbols tree) s))
+
+;; Undefined if symbol is not in tree. Be careful!
+(define (encode-symbol [s : Symbol] [tree : HTree] → Bit*)
+ (match tree with
+ [Leaf s w -> ⊥]
+ [Node l* r* s* w ->
+ (if (contains-symbol s l*)
+ (∷ O (encode-symbol s l*))
+ (∷ I (encode-symbol s r*)))]))
+
+(check-type
+ (encode (decode sample-message sample-tree) sample-tree)
+ : Bit*
+ ⇒ sample-message)
+
+(define-type-alias Frequency Int)
+(define (generate-huffman-tree [pair* : (List (× Symbol Frequency))] → HTree)
+ (successive-merge (make-leaf-set pair*)))
+
+(define (successive-merge [tree* : HTreeSet] → HTree)
+ (match tree* with
+ [⊥ -> (Leaf "ERROR" 0)]
+ [∷ t t* ->
+ (match t* with
+ [⊥ -> t]
+ [∷ t2 t* ->
+ (successive-merge (adjoin-set (make-code-tree t t2) t*))])]))
+
+(define rock-pair*
+ (∷ (tup "A" 2)
+ (∷ (tup "BOOM" 2)
+ (∷ (tup "GET" 2)
+ (∷ (tup "JOB" 2)
+ (∷ (tup "NA" 16)
+ (∷ (tup "SHA" 3)
+ (∷ (tup "YIP" 9)
+ (∷ (tup "WAH" 1)
+ ⊥)))))))))
+
+(define rock-tree (generate-huffman-tree rock-pair*))
+
+(define rock-message
+ (∷ "GET" (∷ "A" (∷ "JOB"
+ (∷ "SHA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA"
+ (∷ "GET" (∷ "A" (∷ "JOB"
+ (∷ "SHA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA"
+ (∷ "WAH" (∷ "YIP" (∷ "YIP" (∷ "YIP" (∷ "YIP" (∷ "YIP" (∷ "YIP" (∷ "YIP" (∷ "YIP" (∷ "YIP"
+ (∷ "SHA" (∷ "BOOM" ⊥)))))))))))))))))))))))))))))))))))))
+
+(define rock-bit* (encode rock-message rock-tree))
+
+(check-type
+ (decode rock-bit* rock-tree)
+ : SymbolList
+ ⇒ rock-message)
+
+(check-type
+ (length rock-bit*)
+ : Int
+ ⇒ 84)