www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README

huffman.mlish (7175B)


      1 #lang s-exp "../../../mlish.rkt"
      2 (require "../../rackunit-typechecking.rkt")
      3 
      4 ;; Huffman trees from SICP
      5 
      6 ;; =============================================================================
      7 ;; === Sets of Symbols
      8 
      9 (define-type-alias Symbol String)
     10 
     11 ;; Set of strings
     12 (define-type Symbol*
     13   [Empty]
     14   [Singleton String]
     15   [Join String Symbol* Symbol*])
     16 
     17 (define (empty → Symbol*)
     18   Empty)
     19 
     20 (define (singleton [s : String] → Symbol*)
     21   (Singleton s))
     22 
     23 (define (insert [s* : Symbol*] [s1 : String] → Symbol*)
     24   (match s* with
     25    [Empty -> (singleton s1)]
     26    [Singleton s2 ->
     27     (if (string<=? s1 s2)
     28       (if (string=? s1 s2)
     29         s*
     30         (Join s2 (singleton s1) (empty)))
     31       (Join s1 (singleton s2) (empty)))]
     32    [Join s2 l* r* ->
     33     (if (string<=? s1 s2)
     34       (if (string=? s1 s2)
     35         s*
     36         (Join s2 (insert l* s1) r*))
     37       (Join s2 l* (insert r* s1)))]))
     38 
     39 (define (union [s1 : Symbol*] [s2 : Symbol*] → Symbol*)
     40   (match s1 with
     41    [Empty -> s2]
     42    [Singleton s -> (insert s2 s)]
     43    [Join s l* r* -> (union l* (union r* (insert s2 s)))]))
     44 
     45 (define (contains [s* : Symbol*] [s : Symbol] → Bool)
     46   (match s* with
     47    [Empty -> #f]
     48    [Singleton s2 -> (string=? s s2)]
     49    [Join s2 l* r* ->
     50     (if (string<=? s s2)
     51       (if (string=? s s2)
     52         #t
     53         (contains l* s))
     54       (contains r* s))]))
     55 
     56 ;; -----------------------------------------------------------------------------
     57 
     58 (check-type
     59   (insert (empty) "hello")
     60   : Symbol*
     61   ⇒ (singleton "hello"))
     62 
     63 (check-type
     64   (insert (insert (empty) "a") "b")
     65   : Symbol*
     66   ⇒ (Join "b" (singleton "a") (empty)))
     67 
     68 (check-type
     69   (insert (insert (empty) "b") "a")
     70   : Symbol*
     71   ⇒ (Join "b" (singleton "a") (empty)))
     72 
     73 (check-type
     74   (insert (insert (insert (empty) "a") "b") "c")
     75   : Symbol*
     76   ⇒ (Join "b" (singleton "a") (singleton "c")))
     77 
     78 (check-type
     79   (insert (insert (insert (empty) "c") "b") "a")
     80   : Symbol*
     81   ⇒ (Join "c" (Join "b" (singleton "a") (empty)) (empty)))
     82 
     83 (check-type
     84   (union
     85     (insert (insert (insert (empty) "c") "b") "a")
     86     (insert (insert (insert (empty) "a") "b") "c"))
     87   : Symbol*
     88   ⇒ (Join "b" (singleton "a") (singleton "c")))
     89 
     90 ;; -----------------------------------------------------------------------------
     91 
     92 (define-type (List A)
     93   [⊥]
     94   [∷ A (List A)])
     95 
     96 (define-type-alias SymbolList (List Symbol))
     97 
     98 (define (list [x : A] → (List A))
     99   (∷ x ⊥))
    100 
    101 (define (append [x* : (List A)] [y* : (List A)] → (List A))
    102   (match x* with
    103    [⊥ -> y*]
    104    [∷ x x* ->
    105     (∷ x (append x* y*))]))
    106 
    107 (define (length [x* : (List A)] → Int)
    108   (match x* with
    109    [⊥ -> 0]
    110    [∷ x x* -> (+ 1 (length x*))]))
    111 
    112 ;; -----------------------------------------------------------------------------
    113 
    114 (define-type Bit O I)
    115 (define-type-alias Bit* (List Bit))
    116 
    117 ;; -----------------------------------------------------------------------------
    118 
    119 (define-type HTree
    120   [Leaf String Int] ;; Symbol, Weight
    121   [Node HTree HTree Symbol* Int] ;; Left, Right, Symbols, Weight
    122 )
    123 
    124 (define (symbols [h : HTree] → Symbol*)
    125   (match h with
    126    [Leaf s w -> (singleton s)]
    127    [Node lh rh s* w -> s*]))
    128 
    129 (define (weight [h : HTree] → Int)
    130   (match h with
    131    [Leaf s w -> w]
    132    [Node l r s w -> w]))
    133 
    134 (define (make-code-tree [left : HTree] [right : HTree] → HTree)
    135   (Node left right
    136     (union (symbols left) (symbols right))
    137     (+ (weight left) (weight right))))
    138 
    139 (define (decode-aux [bits : Bit*] [root : HTree] [current-branch : HTree] → SymbolList)
    140   (match bits with
    141    [⊥ ->
    142     ⊥]
    143    [∷ b bit* ->
    144     (match (choose-branch b current-branch) with
    145      [Leaf s w ->
    146       (∷ s (decode-aux bit* root root))]
    147      [Node l r s* w ->
    148       (decode-aux bit* root (Node l r s* w))])]))
    149 
    150 (define (decode [bits : Bit*] [tree : HTree] → SymbolList)
    151   (decode-aux bits tree tree))
    152 
    153 (define (choose-branch [bit : Bit] [branch : HTree] → HTree)
    154   (match branch with
    155    [Leaf s w ->
    156     ;; Error
    157     (Leaf "ERROR" 0)]
    158    [Node l* r* s* w ->
    159     (match bit with
    160      [O -> l*]
    161      [I -> r*])]))
    162 
    163 (define-type-alias HTreeSet (List HTree))
    164 
    165 (define (adjoin-set [x : HTree] [set : HTreeSet] → HTreeSet)
    166   (match set with
    167    [⊥ -> (list x)]
    168    [∷ y y* ->
    169     (if (< (weight x) (weight y))
    170       (∷ x set)
    171       (∷ y (adjoin-set x y*)))]))
    172 
    173 (define (make-leaf-set [pair* : (List (× Symbol Int))] → HTreeSet)
    174   (match pair* with
    175    [⊥ -> ⊥]
    176    [∷ pair pair* ->
    177     (match pair with
    178      [s i ->
    179       (adjoin-set (Leaf s i) (make-leaf-set pair*))])]))
    180 
    181 (check-type
    182   (make-leaf-set (∷ (tup "A" 4)
    183                  (∷ (tup "B" 2)
    184                  (∷ (tup "C" 1)
    185                  (∷ (tup "D" 1)
    186                  ⊥)))))
    187   : HTreeSet
    188   ⇒ (∷ (Leaf "D" 1)
    189     (∷ (Leaf "C" 1)
    190     (∷ (Leaf "B" 2)
    191     (∷ (Leaf "A" 4)
    192     ⊥)))))
    193 
    194 (define sample-tree
    195   (make-code-tree
    196     (Leaf "A" 4)
    197     (make-code-tree
    198       (Leaf "B" 2)
    199       (make-code-tree
    200         (Leaf "D" 1)
    201         (Leaf "C" 1)))))
    202 
    203 (define sample-message
    204   (∷ O (∷ I  (∷ I  (∷ O  (∷ O  (∷ I  (∷ O  (∷ I  (∷ O  (∷ I  (∷ I  (∷ I  (∷ I  (∷ O ⊥)))))))))))))))
    205 
    206 (check-type
    207   (decode sample-message sample-tree)
    208   : SymbolList
    209   ⇒  (∷ "A" (∷ "D" (∷ "A" (∷ "B" (∷ "B" (∷ "C" (∷ "B" ⊥))))))))
    210 
    211 (define (encode [message : SymbolList] [tree : HTree] → Bit*)
    212   (match message with
    213    [⊥ -> ⊥]
    214    [∷ m m* ->
    215     (append (encode-symbol m tree) (encode m* tree))]))
    216 
    217 (define (contains-symbol [s : Symbol] [tree : HTree] → Bool)
    218   (contains (symbols tree) s))
    219 
    220 ;; Undefined if symbol is not in tree. Be careful!
    221 (define (encode-symbol [s : Symbol] [tree : HTree] → Bit*)
    222   (match tree with
    223    [Leaf s w -> ⊥]
    224    [Node l* r* s* w ->
    225     (if (contains-symbol s l*)
    226       (∷ O (encode-symbol s l*))
    227       (∷ I (encode-symbol s r*)))]))
    228 
    229 (check-type
    230   (encode (decode sample-message sample-tree) sample-tree)
    231   : Bit*
    232   ⇒ sample-message)
    233 
    234 (define-type-alias Frequency Int)
    235 (define (generate-huffman-tree [pair* : (List (× Symbol Frequency))] → HTree)
    236   (successive-merge (make-leaf-set pair*)))
    237 
    238 (define (successive-merge [tree* : HTreeSet] → HTree)
    239   (match tree* with
    240    [⊥ -> (Leaf "ERROR" 0)]
    241    [∷ t t* ->
    242     (match t* with
    243      [⊥ -> t]
    244      [∷ t2 t* ->
    245       (successive-merge (adjoin-set (make-code-tree t t2) t*))])]))
    246 
    247 (define rock-pair*
    248   (∷ (tup "A" 2)
    249   (∷ (tup "BOOM" 2)
    250   (∷ (tup "GET" 2)
    251   (∷ (tup "JOB" 2)
    252   (∷ (tup "NA" 16)
    253   (∷ (tup "SHA" 3)
    254   (∷ (tup "YIP" 9)
    255   (∷ (tup "WAH" 1)
    256      ⊥)))))))))
    257 
    258 (define rock-tree (generate-huffman-tree rock-pair*))
    259 
    260 (define rock-message
    261   (∷ "GET" (∷ "A" (∷ "JOB"
    262   (∷ "SHA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA"
    263   (∷ "GET" (∷ "A" (∷ "JOB"
    264   (∷ "SHA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA" (∷ "NA"
    265   (∷ "WAH" (∷ "YIP" (∷ "YIP" (∷ "YIP" (∷ "YIP" (∷ "YIP" (∷ "YIP" (∷ "YIP" (∷ "YIP" (∷ "YIP"
    266   (∷ "SHA" (∷ "BOOM" ⊥)))))))))))))))))))))))))))))))))))))
    267 
    268 (define rock-bit* (encode rock-message rock-tree))
    269 
    270 (check-type
    271   (decode rock-bit* rock-tree)
    272   : SymbolList
    273   ⇒ rock-message)
    274 
    275 (check-type
    276   (length rock-bit*)
    277   : Int
    278   ⇒ 84)