[41] | 1 | #!/usr/bin/env python
|
---|
[55] | 2 | #
|
---|
[59] | 3 | # Parse FASTA file and print statistics on reading frames
|
---|
[55] | 4 | # BSDLicence
|
---|
| 5 | # Rick van der Zwet - 0433373 - <info@rickvaderzwet.nl>
|
---|
[59] | 6 | import sys
|
---|
[41] | 7 |
|
---|
[59] | 8 | def _frame_stat(seq, start=0):
|
---|
[54] | 9 | pdict = {}
|
---|
[59] | 10 | for n in range(start,len(seq),2):
|
---|
| 11 | codon = seq[n:n+3]
|
---|
| 12 | if len(codon) < 3:
|
---|
| 13 | continue
|
---|
| 14 | if not pdict.has_key(codon):
|
---|
| 15 | pdict[codon] = 1
|
---|
[54] | 16 | else:
|
---|
[59] | 17 | pdict[codon] += 1
|
---|
[41] | 18 |
|
---|
[59] | 19 | return(pdict)
|
---|
[54] | 20 |
|
---|
[59] | 21 | def reading_frames(seq):
|
---|
| 22 | '''Parse from left to right and right to left, at position 1,2,3 in
|
---|
| 23 | in the so called nucleotide triplets
|
---|
| 24 | See: http://en.wikipedia.org/wiki/Genetic_code'''
|
---|
[54] | 25 |
|
---|
[59] | 26 | final = {}
|
---|
[54] | 27 |
|
---|
[59] | 28 | for start in [0,1,2]:
|
---|
| 29 | print "Normal; start %i" % (start)
|
---|
| 30 | retval = _frame_stat(seq,start)
|
---|
| 31 | for codon,v in retval.iteritems():
|
---|
[63] | 32 | if not final.has_key(codon):
|
---|
| 33 | final[codon] = [0,0,0,0,0,0]
|
---|
[59] | 34 | final[codon][start] += v
|
---|
| 35 | print "Reverse; start %i" % (start)
|
---|
| 36 | retval = _frame_stat(seq[::-1],start)
|
---|
| 37 | for codon,v in retval.iteritems():
|
---|
[63] | 38 | if not final.has_key(codon):
|
---|
| 39 | final[codon] = [0,0,0,0,0,0]
|
---|
[59] | 40 | final[codon][start+3] += v
|
---|
[54] | 41 |
|
---|
[59] | 42 | print "CODON : N:0 , N:1 , N:2 , R:0 , R:1 , R:2 "
|
---|
[54] | 43 |
|
---|
[59] | 44 | for codon in sorted(final.keys()):
|
---|
| 45 | print codon," : ", ",".join(["%6i" % x for x in final[codon]])
|
---|
[54] | 46 |
|
---|
| 47 |
|
---|
[55] | 48 |
|
---|
[59] | 49 | if __name__ == "__main__":
|
---|
| 50 | # Load data
|
---|
| 51 | try:
|
---|
| 52 | handle = open(sys.argv[1],"rU")
|
---|
| 53 | except IndexError:
|
---|
| 54 | print "Usage %s <data_file>" % (sys.argv[0])
|
---|
| 55 | sys.exit(64)
|
---|
| 56 | except IOError:
|
---|
| 57 | print "Unable to open '%s'" % (sys.argv[1])
|
---|
| 58 | sys.exit(64)
|
---|
| 59 |
|
---|
| 60 | seq = handle.read()
|
---|
| 61 | handle.close()
|
---|
| 62 | reading_frames(seq)
|
---|