source: liacs/dbdm/dbdm_4/reading_frames.py@ 59

Last change on this file since 59 was 59, checked in by Rick van der Zwet, 15 years ago

Do the reading_frames as well

  • Property svn:executable set to *
File size: 1.7 KB
RevLine 
[41]1#!/usr/bin/env python
[55]2#
[59]3# Parse FASTA file and print statistics on reading frames
[55]4# BSDLicence
5# Rick van der Zwet - 0433373 - <info@rickvaderzwet.nl>
[59]6import sys
[41]7
[59]8def _frame_stat(seq, start=0):
[54]9 pdict = {}
[59]10 for n in range(start,len(seq),2):
11 codon = seq[n:n+3]
12 if len(codon) < 3:
13 continue
14 if not pdict.has_key(codon):
15 pdict[codon] = 1
[54]16 else:
[59]17 pdict[codon] += 1
[41]18
[59]19 return(pdict)
[54]20
[59]21def reading_frames(seq):
22 '''Parse from left to right and right to left, at position 1,2,3 in
23 in the so called nucleotide triplets
24 See: http://en.wikipedia.org/wiki/Genetic_code'''
[54]25
[59]26 # Populate all empty
27 final = {}
28 for a in 'acgtn':
29 for b in 'acgtn':
30 for c in 'acgtn':
31 final[a + b + c] = [0,0,0,0,0,0]
[54]32
[59]33 for start in [0,1,2]:
34 print "Normal; start %i" % (start)
35 retval = _frame_stat(seq,start)
36 for codon,v in retval.iteritems():
37 final[codon][start] += v
38 print "Reverse; start %i" % (start)
39 retval = _frame_stat(seq[::-1],start)
40 for codon,v in retval.iteritems():
41 final[codon][start+3] += v
[54]42
[59]43 print "CODON : N:0 , N:1 , N:2 , R:0 , R:1 , R:2 "
[54]44
[59]45 for codon in sorted(final.keys()):
46 print codon," : ", ",".join(["%6i" % x for x in final[codon]])
[54]47
48
[55]49
[59]50if __name__ == "__main__":
51 # Load data
52 try:
53 handle = open(sys.argv[1],"rU")
54 except IndexError:
55 print "Usage %s <data_file>" % (sys.argv[0])
56 sys.exit(64)
57 except IOError:
58 print "Unable to open '%s'" % (sys.argv[1])
59 sys.exit(64)
60
61 seq = handle.read()
62 handle.close()
63 reading_frames(seq)
Note: See TracBrowser for help on using the repository browser.