source: liacs/dbdm/dbdm_4/reading_frames.py@ 374

Last change on this file since 374 was 63, checked in by Rick van der Zwet, 15 years ago

Finally got all the matching right

  • Property svn:executable set to *
File size: 1.7 KB
Line 
1#!/usr/bin/env python
2#
3# Parse FASTA file and print statistics on reading frames
4# BSDLicence
5# Rick van der Zwet - 0433373 - <info@rickvaderzwet.nl>
6import sys
7
8def _frame_stat(seq, start=0):
9 pdict = {}
10 for n in range(start,len(seq),2):
11 codon = seq[n:n+3]
12 if len(codon) < 3:
13 continue
14 if not pdict.has_key(codon):
15 pdict[codon] = 1
16 else:
17 pdict[codon] += 1
18
19 return(pdict)
20
21def reading_frames(seq):
22 '''Parse from left to right and right to left, at position 1,2,3 in
23 in the so called nucleotide triplets
24 See: http://en.wikipedia.org/wiki/Genetic_code'''
25
26 final = {}
27
28 for start in [0,1,2]:
29 print "Normal; start %i" % (start)
30 retval = _frame_stat(seq,start)
31 for codon,v in retval.iteritems():
32 if not final.has_key(codon):
33 final[codon] = [0,0,0,0,0,0]
34 final[codon][start] += v
35 print "Reverse; start %i" % (start)
36 retval = _frame_stat(seq[::-1],start)
37 for codon,v in retval.iteritems():
38 if not final.has_key(codon):
39 final[codon] = [0,0,0,0,0,0]
40 final[codon][start+3] += v
41
42 print "CODON : N:0 , N:1 , N:2 , R:0 , R:1 , R:2 "
43
44 for codon in sorted(final.keys()):
45 print codon," : ", ",".join(["%6i" % x for x in final[codon]])
46
47
48
49if __name__ == "__main__":
50 # Load data
51 try:
52 handle = open(sys.argv[1],"rU")
53 except IndexError:
54 print "Usage %s <data_file>" % (sys.argv[0])
55 sys.exit(64)
56 except IOError:
57 print "Unable to open '%s'" % (sys.argv[1])
58 sys.exit(64)
59
60 seq = handle.read()
61 handle.close()
62 reading_frames(seq)
Note: See TracBrowser for help on using the repository browser.