source: liacs/dbdm/dbdm_4/reading_frames.py@ 60

Last change on this file since 60 was 59, checked in by Rick van der Zwet, 15 years ago

Do the reading_frames as well

  • Property svn:executable set to *
File size: 1.7 KB
Line 
1#!/usr/bin/env python
2#
3# Parse FASTA file and print statistics on reading frames
4# BSDLicence
5# Rick van der Zwet - 0433373 - <info@rickvaderzwet.nl>
6import sys
7
8def _frame_stat(seq, start=0):
9 pdict = {}
10 for n in range(start,len(seq),2):
11 codon = seq[n:n+3]
12 if len(codon) < 3:
13 continue
14 if not pdict.has_key(codon):
15 pdict[codon] = 1
16 else:
17 pdict[codon] += 1
18
19 return(pdict)
20
21def reading_frames(seq):
22 '''Parse from left to right and right to left, at position 1,2,3 in
23 in the so called nucleotide triplets
24 See: http://en.wikipedia.org/wiki/Genetic_code'''
25
26 # Populate all empty
27 final = {}
28 for a in 'acgtn':
29 for b in 'acgtn':
30 for c in 'acgtn':
31 final[a + b + c] = [0,0,0,0,0,0]
32
33 for start in [0,1,2]:
34 print "Normal; start %i" % (start)
35 retval = _frame_stat(seq,start)
36 for codon,v in retval.iteritems():
37 final[codon][start] += v
38 print "Reverse; start %i" % (start)
39 retval = _frame_stat(seq[::-1],start)
40 for codon,v in retval.iteritems():
41 final[codon][start+3] += v
42
43 print "CODON : N:0 , N:1 , N:2 , R:0 , R:1 , R:2 "
44
45 for codon in sorted(final.keys()):
46 print codon," : ", ",".join(["%6i" % x for x in final[codon]])
47
48
49
50if __name__ == "__main__":
51 # Load data
52 try:
53 handle = open(sys.argv[1],"rU")
54 except IndexError:
55 print "Usage %s <data_file>" % (sys.argv[0])
56 sys.exit(64)
57 except IOError:
58 print "Unable to open '%s'" % (sys.argv[1])
59 sys.exit(64)
60
61 seq = handle.read()
62 handle.close()
63 reading_frames(seq)
Note: See TracBrowser for help on using the repository browser.