1 | #!/usr/bin/env python
|
---|
2 | #
|
---|
3 | # Parse FASTA file and print statistics on reading frames
|
---|
4 | # BSDLicence
|
---|
5 | # Rick van der Zwet - 0433373 - <info@rickvaderzwet.nl>
|
---|
6 | import sys
|
---|
7 |
|
---|
8 | def _frame_stat(seq, start=0):
|
---|
9 | pdict = {}
|
---|
10 | for n in range(start,len(seq),2):
|
---|
11 | codon = seq[n:n+3]
|
---|
12 | if len(codon) < 3:
|
---|
13 | continue
|
---|
14 | if not pdict.has_key(codon):
|
---|
15 | pdict[codon] = 1
|
---|
16 | else:
|
---|
17 | pdict[codon] += 1
|
---|
18 |
|
---|
19 | return(pdict)
|
---|
20 |
|
---|
21 | def reading_frames(seq):
|
---|
22 | '''Parse from left to right and right to left, at position 1,2,3 in
|
---|
23 | in the so called nucleotide triplets
|
---|
24 | See: http://en.wikipedia.org/wiki/Genetic_code'''
|
---|
25 |
|
---|
26 | # Populate all empty
|
---|
27 | final = {}
|
---|
28 | for a in 'acgtn':
|
---|
29 | for b in 'acgtn':
|
---|
30 | for c in 'acgtn':
|
---|
31 | final[a + b + c] = [0,0,0,0,0,0]
|
---|
32 |
|
---|
33 | for start in [0,1,2]:
|
---|
34 | print "Normal; start %i" % (start)
|
---|
35 | retval = _frame_stat(seq,start)
|
---|
36 | for codon,v in retval.iteritems():
|
---|
37 | final[codon][start] += v
|
---|
38 | print "Reverse; start %i" % (start)
|
---|
39 | retval = _frame_stat(seq[::-1],start)
|
---|
40 | for codon,v in retval.iteritems():
|
---|
41 | final[codon][start+3] += v
|
---|
42 |
|
---|
43 | print "CODON : N:0 , N:1 , N:2 , R:0 , R:1 , R:2 "
|
---|
44 |
|
---|
45 | for codon in sorted(final.keys()):
|
---|
46 | print codon," : ", ",".join(["%6i" % x for x in final[codon]])
|
---|
47 |
|
---|
48 |
|
---|
49 |
|
---|
50 | if __name__ == "__main__":
|
---|
51 | # Load data
|
---|
52 | try:
|
---|
53 | handle = open(sys.argv[1],"rU")
|
---|
54 | except IndexError:
|
---|
55 | print "Usage %s <data_file>" % (sys.argv[0])
|
---|
56 | sys.exit(64)
|
---|
57 | except IOError:
|
---|
58 | print "Unable to open '%s'" % (sys.argv[1])
|
---|
59 | sys.exit(64)
|
---|
60 |
|
---|
61 | seq = handle.read()
|
---|
62 | handle.close()
|
---|
63 | reading_frames(seq)
|
---|