Context Navigation

← Previous Changeset
Next Changeset →

Changeset 61

Timestamp:

Dec 23, 2009, 1:01:23 PM (15 years ago)

Author:

Rick van der Zwet

Message:

Start hacking to try to understand the so 'simple' csv file :-)

File:

: 1 edited

liacs/dbdm/dbdm_4/ecoli_hmm.py (modified) (3 diffs)

Legend:

: Unmodified
: Added
: Removed

liacs/dbdm/dbdm_4/ecoli_hmm.py

-              r60
+              r61
 # http://en.wikipedia.org/wiki/Escherichia_coli
 # http://en.wikipedia.org/wiki/Open_reading_frame
+# http://nl.wikipedia.org/wiki/Genetische_code
 import ghmm
 import sys
+import csv
+import string
 …
     # The emission probabilities matrix is modeled after the statistics with a
     # total of 1
     etogene = [ 0.2, 0.3, 0.3, 0.1, 0.2 ]
+    etogene = [ 0.1, 0.1, 0.1, 0.1, 0.6 ]
     eingene = [ 0.2, 0.3, 0.3, 0.2, 0.1 ]
 …
     print "Observations         : ", ''.join(obs)
+    # XXX: Training
+    # Start codons
+    # atg, (small chance) gtg
+    # Stop codons
+    # taa, tga
+    # tag
+    # lend = Left End
+    # rend = Right End
+    # Training
+    # A -- T, G -- C
+    start_condons = ['atg', 'gtg']
+    stop_condons = ['taa', 'tga', 'tag']
+    dna_flip = string.maketrans('atgc','tacg')
+    edl_file = 'data/edl_genes.csv'
+    reader = csv.reader(open(edl_file,"rU"))
+    reader.next()
+    try:
+        for row in reader:
+            (featureType, zNumber, contig, lend, rend, orientation,
+            segmentType, oIslandNumber, geneName, note, function, product,
+            translationNotes) = row
+            lend = int(lend) - 1
+            rend = int(rend)
+            # Make a forward orientation as positive
+            if orientation == '>':
+                order = True
+            else:
+                order = False
+            if order:
+                start_codon = seq[lend:lend+3]
+                stop_codon = seq[rend-3:rend]
+            else:
+                start_codon = seq[rend-3:rend].translate(dna_flip)[::-1]
+                stop_codon = seq[lend:lend+3].translate(dna_flip)[::-1]
+            print "%6s, %s, %s, %s, %s, %s," % (geneName,lend,rend, start_codon, stop_codon, orientation),
+            print "%s, %s" % (start_codon in start_condons, stop_codon in stop_condons)
+            if not start_codon in start_condons or not stop_codon in stop_condons:
+                return
+    except csv.Error, e:
+        sys.exit('file %s, line %d: %s' % (filename, reader.line_num, e))
+    test_seq=ghmm.EmissionSequence(sigma,list(seq[0:(len(seq) / 3)]))
+    v = m.viterbi(test_seq)
+    #print "fairness of test_seq: ", v
+    # XXX: Validation
+    # Validation
+    val_seq=ghmm.EmissionSequence(sigma,list(seq[10:2000]))
+    v = m.viterbi(val_seq)
+    #print "Test sequence: ", v
     # XXX: Results

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 61

Legend:

liacs/dbdm/dbdm_4/ecoli_hmm.py

Download in other formats: