Changeset 66
- Timestamp:
- Jan 3, 2010, 8:20:21 PM (15 years ago)
- Location:
- liacs/dbdm/dbdm_4
- Files:
-
- 1 added
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
liacs/dbdm/dbdm_4/ecoli_hmm.py
r64 r66 29 29 } 30 30 31 # Transform the state to something human readable 31 32 dna_ascii_translate = { 32 '0' : ' *',33 '0' : '.', 33 34 '1' : '<', 34 35 '2' : '<', 35 36 '3' : '<', 36 '4' : ' -',37 '4' : '|', 37 38 '5' : '>', 38 39 '6' : '>', … … 42 43 dna_ascii = MultiReplace(dna_ascii_translate) 43 44 44 def pretty_print(test_seq, ans_seq, v, length= 70, parts=10, seperator=''):45 def pretty_print(test_seq, ans_seq, v, length=100, parts=10, seperator=''): 45 46 """ Pretty printing of output for verification purposes """ 46 47 for i in range(0,len(v[0]),length): … … 54 55 result.append(dna_ascii.replace(''.join(map(str,v[0][t:t+parts])))) 55 56 56 print seperator.join(seq)57 print seperator.join(ans)58 print seperator.join(result)57 print "ORG: " + seperator.join(seq) 58 print "CSV: " + seperator.join(ans) 59 print "HMM: " + seperator.join(result) 59 60 print '' 60 61 print "fairness of test_seq: ", v[1] … … 79 80 # 7) Stop-codon : end of gene - part 3 80 81 A = [ 81 [0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 82 [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], 83 [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0], 84 [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0], 85 [0.0, 0.0, 0.0, 0.0, 0. 7, 0.3, 0.0, 0.0],86 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0], 87 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], 88 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 82 [0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], # 0 83 [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], # 1 84 [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0], # 2 85 [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0], # 3 86 [0.0, 0.0, 0.0, 0.0, 0.9, 0.1, 0.0, 0.0], # 4 87 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0], # 5 88 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], # 6 89 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], # 7 89 90 ] 90 91 … … 94 95 B = [ 95 96 # e.g. state 0 -> emission probability 96 [0.2, 0.2, 0.2, 0.2, 0.2] , 97 [0.9, 0.0, 0.1, 0.0, 0.0] , 98 [0.0, 0.0, 0.0, 1.0, 0.0] , 99 [0.0, 0.0, 1.0, 0.0, 0.0] , 100 [0.2, 0.2, 0.2, 0.2, 0.2] , 101 [0.0, 0.0, 0.0, 1.0, 0.0] , 102 [0.7, 0.0, 0.3, 0.0, 0.0] , 103 [0.7, 0.0, 0.3, 0.0, 0.0] , 97 [0.2, 0.2, 0.2, 0.2, 0.2] , # 0 98 [0.9, 0.0, 0.1, 0.0, 0.0] , # 1 99 [0.0, 0.0, 0.0, 1.0, 0.0] , # 2 100 [0.0, 0.0, 1.0, 0.0, 0.0] , # 3 101 [0.2, 0.2, 0.2, 0.2, 0.2] , # 4 102 [0.0, 0.0, 0.0, 1.0, 0.0] , # 5 103 [0.7, 0.0, 0.3, 0.0, 0.0] , # 6 104 [0.7, 0.0, 0.3, 0.0, 0.0] , # 7 104 105 ] 105 106 … … 109 110 m = ghmm.HMMFromMatrices(sigma,ghmm.DiscreteDistribution(sigma),A ,B, pi) 110 111 print "Initial HMM" 111 print m 112 print m.verboseStr() 112 113 113 114 obs_seq = m.sampleSingle(20) … … 130 131 handle.close() 131 132 132 test_seq = contig_seq['AE005174v2-2'][0:490] 133 ans_seq = answer['AE005174v2-2'][0:490] 133 test_size = 1000; 134 test_seq = contig_seq['AE005174v2-2'][0:test_size] 135 ans_seq = answer['AE005174v2-2'][0:test_size] 134 136 test_eseq=ghmm.EmissionSequence(sigma,list(test_seq)) 135 137 v = m.viterbi(test_eseq) … … 139 141 # Train sequence 140 142 print "Training baumWelch" 141 train_seq = ghmm.EmissionSequence(sigma,list(contig_seq['AE005174v2-1'] ))143 train_seq = ghmm.EmissionSequence(sigma,list(contig_seq['AE005174v2-1'][0:10000])) 142 144 v = m.baumWelch(train_seq) 143 print m 145 print m.verboseStr() 144 146 145 147 -
liacs/dbdm/dbdm_4/gene_detect.py
r64 r66 58 58 handle = open(contig + ".raw","rU") 59 59 contig_seq[contig] = handle.read() 60 abstract_contig[contig] = list(' *' * len(contig_seq[contig]))60 abstract_contig[contig] = list('.' * len(contig_seq[contig])) 61 61 handle.close() 62 62 except IOError: … … 94 94 if start_pos < stop_pos: 95 95 abstract_contig[contig][start_pos:start_pos+3] = ['<'] * 3 96 abstract_contig[contig][start_pos+3:stop_pos] = [' -'] * (stop_pos - (start_pos + 3))96 abstract_contig[contig][start_pos+3:stop_pos] = ['|'] * (stop_pos - (start_pos + 3)) 97 97 abstract_contig[contig][stop_pos:stop_pos+3] = ['>'] * 3 98 98 break
Note:
See TracChangeset
for help on using the changeset viewer.