[60] | 1 | #!/usr/bin/env python
|
---|
| 2 |
|
---|
| 3 | # http://en.wikipedia.org/wiki/Stop_codon
|
---|
| 4 | # http://en.wikipedia.org/wiki/Escherichia_coli
|
---|
| 5 | # http://en.wikipedia.org/wiki/Open_reading_frame
|
---|
| 6 |
|
---|
| 7 | import ghmm
|
---|
| 8 | import sys
|
---|
| 9 |
|
---|
| 10 |
|
---|
| 11 | def ecoli_hmm(seq):
|
---|
| 12 | """Try to find genes inside e sequence using a HMM"""
|
---|
| 13 | # Model 4 bases A C G T and unknown state N
|
---|
| 14 | sigma = ghmm.Alphabet(['a', 'c', 'g', 't', 'n' ])
|
---|
| 15 | print sigma
|
---|
| 16 |
|
---|
| 17 | # XXX: Proper values, based of statistics
|
---|
| 18 | # The transition matrix A is chosen such that it reflects the statistics
|
---|
| 19 | # Part one will try to get us into a gene, part two will tell us when to
|
---|
| 20 | # get out of it
|
---|
| 21 | A = [[0.6,0.4], [0.3, 0.7]]
|
---|
| 22 |
|
---|
| 23 | # The emission probabilities matrix is modeled after the statistics with a
|
---|
| 24 | # total of 1
|
---|
| 25 | etogene = [ 0.2, 0.3, 0.3, 0.1, 0.2 ]
|
---|
| 26 | eingene = [ 0.2, 0.3, 0.3, 0.2, 0.1 ]
|
---|
| 27 |
|
---|
| 28 | B = [etogene,eingene]
|
---|
| 29 |
|
---|
| 30 | # Initial distribution favors outside
|
---|
| 31 | pi = [0.9, 0.1]
|
---|
| 32 |
|
---|
| 33 | m = ghmm.HMMFromMatrices(sigma,ghmm.DiscreteDistribution(sigma),A ,B, pi)
|
---|
| 34 | print "Initial HMM", m
|
---|
| 35 |
|
---|
| 36 | obs_seq = m.sampleSingle(20)
|
---|
| 37 | print "Observation sequence : ", obs_seq
|
---|
| 38 | obs = map(sigma.external, obs_seq)
|
---|
| 39 | print "Observations : ", ''.join(obs)
|
---|
| 40 |
|
---|
| 41 | # XXX: Training
|
---|
| 42 |
|
---|
| 43 | # XXX: Validation
|
---|
| 44 |
|
---|
| 45 | # XXX: Results
|
---|
| 46 |
|
---|
| 47 |
|
---|
| 48 |
|
---|
| 49 | if __name__ == "__main__":
|
---|
| 50 | # Load data
|
---|
| 51 | try:
|
---|
| 52 | handle = open(sys.argv[1],"rU")
|
---|
| 53 | except IndexError:
|
---|
| 54 | print "Usage %s <data_file>" % (sys.argv[0])
|
---|
| 55 | sys.exit(64)
|
---|
| 56 | except IOError:
|
---|
| 57 | print "Unable to open '%s'" % (sys.argv[1])
|
---|
| 58 | sys.exit(64)
|
---|
| 59 |
|
---|
| 60 | seq = handle.read()
|
---|
| 61 | handle.close()
|
---|
| 62 | ecoli_hmm(seq)
|
---|