www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README

knuc.mlish (1951B)


      1 #lang s-exp "../../mlish.rkt"
      2 (require "../rackunit-typechecking.rkt")
      3 
      4 (require-typed mk-fasta #:from "fasta.mlish")
      5 
      6 (define (all-counts [len : Int] [dna : String] -> (Hash String (Ref Int)))
      7   (let ([table (hash {String (Ref Int)})])
      8     (for ([s (in-range (- (string-length dna) len) -1 -1)])
      9       (let ([key (make-string len)])
     10         (string-copy! key 0 dna s (+ s len))
     11         (let* ([b (if (hash-has-key? table key)
     12                       (hash-ref table key)
     13                       (let ([b (ref 0)])
     14                         (hash-set! table key b)
     15                         b))])
     16           (:= b (add1 (deref b))))))
     17     table))
     18 
     19 
     20 (define dna
     21   (let* ([in (mk-fasta 100000)]
     22          ;; Skip to ">THREE ..."
     23          [rst 
     24           (head (tail 
     25            (regexp-match 
     26              (regexp ">THREE Homo sapiens frequency\n(.*)$") 
     27              in)))])
     28     (let ([s (open-output-string)])
     29       ;; Copy everything but newlines to s:
     30       (for ([l (in-lines rst)])
     31         (write-string l s))
     32       ;; Extract the string from s:
     33       (string-upcase (get-output-string s)))))
     34 
     35 (check-type dna : String)
     36 
     37 ;; 1-nucleotide counts:
     38 (define counts1 (all-counts 1 dna))
     39 
     40 (check-type counts1 : (Hash String (Ref Int)))
     41 
     42 (check-type (hash-count counts1) : Int -> 4)
     43 
     44 ;; 2-nucleotide counts:
     45 (define counts2 (all-counts 2 dna))
     46 
     47 (check-type counts2 : (Hash String (Ref Int)))
     48 
     49 (check-type (hash-count counts2) : Int -> 16)
     50 
     51 ;; 2-nucleotide counts:
     52 (define counts3 (all-counts 3 dna))
     53 
     54 (check-type counts3 : (Hash String (Ref Int)))
     55 
     56 (check-type (hash-count counts3) : Int -> 64)
     57 
     58 ;; Specific sequences:
     59 (check-type
     60     (for/list ([seq (in-list (list "GGT" "GGTA" "GGTATT" 
     61                                    "GGTATTTTAATT" "GGTATTTTAATTTATAGT"))])
     62       (let ([table (all-counts (string-length seq) dna)])
     63         (if (hash-has-key? table seq)
     64             (deref (hash-ref table seq))
     65             0)))
     66   : (List Int) 
     67   -> (list 5861 1776 176 0 0))