source: liacs/dbdm/dbdm_4/reading_frames.py@ 299

Last change on this file since 299 was 63, checked in by Rick van der Zwet, 15 years ago

Finally got all the matching right

  • Property svn:executable set to *
File size: 1.7 KB
RevLine 
[41]1#!/usr/bin/env python
[55]2#
[59]3# Parse FASTA file and print statistics on reading frames
[55]4# BSDLicence
5# Rick van der Zwet - 0433373 - <info@rickvaderzwet.nl>
[59]6import sys
[41]7
[59]8def _frame_stat(seq, start=0):
[54]9 pdict = {}
[59]10 for n in range(start,len(seq),2):
11 codon = seq[n:n+3]
12 if len(codon) < 3:
13 continue
14 if not pdict.has_key(codon):
15 pdict[codon] = 1
[54]16 else:
[59]17 pdict[codon] += 1
[41]18
[59]19 return(pdict)
[54]20
[59]21def reading_frames(seq):
22 '''Parse from left to right and right to left, at position 1,2,3 in
23 in the so called nucleotide triplets
24 See: http://en.wikipedia.org/wiki/Genetic_code'''
[54]25
[59]26 final = {}
[54]27
[59]28 for start in [0,1,2]:
29 print "Normal; start %i" % (start)
30 retval = _frame_stat(seq,start)
31 for codon,v in retval.iteritems():
[63]32 if not final.has_key(codon):
33 final[codon] = [0,0,0,0,0,0]
[59]34 final[codon][start] += v
35 print "Reverse; start %i" % (start)
36 retval = _frame_stat(seq[::-1],start)
37 for codon,v in retval.iteritems():
[63]38 if not final.has_key(codon):
39 final[codon] = [0,0,0,0,0,0]
[59]40 final[codon][start+3] += v
[54]41
[59]42 print "CODON : N:0 , N:1 , N:2 , R:0 , R:1 , R:2 "
[54]43
[59]44 for codon in sorted(final.keys()):
45 print codon," : ", ",".join(["%6i" % x for x in final[codon]])
[54]46
47
[55]48
[59]49if __name__ == "__main__":
50 # Load data
51 try:
52 handle = open(sys.argv[1],"rU")
53 except IndexError:
54 print "Usage %s <data_file>" % (sys.argv[0])
55 sys.exit(64)
56 except IOError:
57 print "Unable to open '%s'" % (sys.argv[1])
58 sys.exit(64)
59
60 seq = handle.read()
61 handle.close()
62 reading_frames(seq)
Note: See TracBrowser for help on using the repository browser.