Changeset 61
- Timestamp:
- Dec 23, 2009, 1:01:23 PM (15 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
liacs/dbdm/dbdm_4/ecoli_hmm.py
r60 r61 4 4 # http://en.wikipedia.org/wiki/Escherichia_coli 5 5 # http://en.wikipedia.org/wiki/Open_reading_frame 6 # http://nl.wikipedia.org/wiki/Genetische_code 6 7 7 8 import ghmm 8 9 import sys 10 import csv 11 import string 9 12 10 13 … … 23 26 # The emission probabilities matrix is modeled after the statistics with a 24 27 # total of 1 25 etogene = [ 0. 2, 0.3, 0.3, 0.1, 0.2]28 etogene = [ 0.1, 0.1, 0.1, 0.1, 0.6 ] 26 29 eingene = [ 0.2, 0.3, 0.3, 0.2, 0.1 ] 27 30 … … 39 42 print "Observations : ", ''.join(obs) 40 43 41 # XXX: Training 44 # Start codons 45 # atg, (small chance) gtg 46 # Stop codons 47 # taa, tga 48 # tag 49 # lend = Left End 50 # rend = Right End 51 52 # Training 53 # A -- T, G -- C 54 start_condons = ['atg', 'gtg'] 55 stop_condons = ['taa', 'tga', 'tag'] 56 dna_flip = string.maketrans('atgc','tacg') 57 edl_file = 'data/edl_genes.csv' 58 reader = csv.reader(open(edl_file,"rU")) 59 reader.next() 60 try: 61 for row in reader: 62 (featureType, zNumber, contig, lend, rend, orientation, 63 segmentType, oIslandNumber, geneName, note, function, product, 64 translationNotes) = row 65 lend = int(lend) - 1 66 rend = int(rend) 67 # Make a forward orientation as positive 68 if orientation == '>': 69 order = True 70 else: 71 order = False 72 73 if order: 74 start_codon = seq[lend:lend+3] 75 stop_codon = seq[rend-3:rend] 76 else: 77 start_codon = seq[rend-3:rend].translate(dna_flip)[::-1] 78 stop_codon = seq[lend:lend+3].translate(dna_flip)[::-1] 79 80 print "%6s, %s, %s, %s, %s, %s," % (geneName,lend,rend, start_codon, stop_codon, orientation), 81 82 print "%s, %s" % (start_codon in start_condons, stop_codon in stop_condons) 83 if not start_codon in start_condons or not stop_codon in stop_condons: 84 return 85 except csv.Error, e: 86 sys.exit('file %s, line %d: %s' % (filename, reader.line_num, e)) 87 88 89 90 test_seq=ghmm.EmissionSequence(sigma,list(seq[0:(len(seq) / 3)])) 91 v = m.viterbi(test_seq) 92 #print "fairness of test_seq: ", v 93 42 94 43 # XXX: Validation 95 # Validation 96 val_seq=ghmm.EmissionSequence(sigma,list(seq[10:2000])) 97 v = m.viterbi(val_seq) 98 #print "Test sequence: ", v 44 99 45 100 # XXX: Results
Note:
See TracChangeset
for help on using the changeset viewer.