knuc.mlish (1951B)
1 #lang s-exp "../../mlish.rkt" 2 (require "../rackunit-typechecking.rkt") 3 4 (require-typed mk-fasta #:from "fasta.mlish") 5 6 (define (all-counts [len : Int] [dna : String] -> (Hash String (Ref Int))) 7 (let ([table (hash {String (Ref Int)})]) 8 (for ([s (in-range (- (string-length dna) len) -1 -1)]) 9 (let ([key (make-string len)]) 10 (string-copy! key 0 dna s (+ s len)) 11 (let* ([b (if (hash-has-key? table key) 12 (hash-ref table key) 13 (let ([b (ref 0)]) 14 (hash-set! table key b) 15 b))]) 16 (:= b (add1 (deref b)))))) 17 table)) 18 19 20 (define dna 21 (let* ([in (mk-fasta 100000)] 22 ;; Skip to ">THREE ..." 23 [rst 24 (head (tail 25 (regexp-match 26 (regexp ">THREE Homo sapiens frequency\n(.*)$") 27 in)))]) 28 (let ([s (open-output-string)]) 29 ;; Copy everything but newlines to s: 30 (for ([l (in-lines rst)]) 31 (write-string l s)) 32 ;; Extract the string from s: 33 (string-upcase (get-output-string s))))) 34 35 (check-type dna : String) 36 37 ;; 1-nucleotide counts: 38 (define counts1 (all-counts 1 dna)) 39 40 (check-type counts1 : (Hash String (Ref Int))) 41 42 (check-type (hash-count counts1) : Int -> 4) 43 44 ;; 2-nucleotide counts: 45 (define counts2 (all-counts 2 dna)) 46 47 (check-type counts2 : (Hash String (Ref Int))) 48 49 (check-type (hash-count counts2) : Int -> 16) 50 51 ;; 2-nucleotide counts: 52 (define counts3 (all-counts 3 dna)) 53 54 (check-type counts3 : (Hash String (Ref Int))) 55 56 (check-type (hash-count counts3) : Int -> 64) 57 58 ;; Specific sequences: 59 (check-type 60 (for/list ([seq (in-list (list "GGT" "GGTA" "GGTATT" 61 "GGTATTTTAATT" "GGTATTTTAATTTATAGT"))]) 62 (let ([table (all-counts (string-length seq) dna)]) 63 (if (hash-has-key? table seq) 64 (deref (hash-ref table seq)) 65 0))) 66 : (List Int) 67 -> (list 5861 1776 176 0 0))