[41] | 1 | #!/usr/bin/env python
|
---|
[55] | 2 | #
|
---|
[59] | 3 | # Parse FASTA file and print statistics on reading frames
|
---|
[55] | 4 | # BSDLicence
|
---|
| 5 | # Rick van der Zwet - 0433373 - <info@rickvaderzwet.nl>
|
---|
[59] | 6 | import sys
|
---|
[41] | 7 |
|
---|
[59] | 8 | def _frame_stat(seq, start=0):
|
---|
[54] | 9 | pdict = {}
|
---|
[59] | 10 | for n in range(start,len(seq),2):
|
---|
| 11 | codon = seq[n:n+3]
|
---|
| 12 | if len(codon) < 3:
|
---|
| 13 | continue
|
---|
| 14 | if not pdict.has_key(codon):
|
---|
| 15 | pdict[codon] = 1
|
---|
[54] | 16 | else:
|
---|
[59] | 17 | pdict[codon] += 1
|
---|
[41] | 18 |
|
---|
[59] | 19 | return(pdict)
|
---|
[54] | 20 |
|
---|
[59] | 21 | def reading_frames(seq):
|
---|
| 22 | '''Parse from left to right and right to left, at position 1,2,3 in
|
---|
| 23 | in the so called nucleotide triplets
|
---|
| 24 | See: http://en.wikipedia.org/wiki/Genetic_code'''
|
---|
[54] | 25 |
|
---|
[59] | 26 | # Populate all empty
|
---|
| 27 | final = {}
|
---|
| 28 | for a in 'acgtn':
|
---|
| 29 | for b in 'acgtn':
|
---|
| 30 | for c in 'acgtn':
|
---|
| 31 | final[a + b + c] = [0,0,0,0,0,0]
|
---|
[54] | 32 |
|
---|
[59] | 33 | for start in [0,1,2]:
|
---|
| 34 | print "Normal; start %i" % (start)
|
---|
| 35 | retval = _frame_stat(seq,start)
|
---|
| 36 | for codon,v in retval.iteritems():
|
---|
| 37 | final[codon][start] += v
|
---|
| 38 | print "Reverse; start %i" % (start)
|
---|
| 39 | retval = _frame_stat(seq[::-1],start)
|
---|
| 40 | for codon,v in retval.iteritems():
|
---|
| 41 | final[codon][start+3] += v
|
---|
[54] | 42 |
|
---|
[59] | 43 | print "CODON : N:0 , N:1 , N:2 , R:0 , R:1 , R:2 "
|
---|
[54] | 44 |
|
---|
[59] | 45 | for codon in sorted(final.keys()):
|
---|
| 46 | print codon," : ", ",".join(["%6i" % x for x in final[codon]])
|
---|
[54] | 47 |
|
---|
| 48 |
|
---|
[55] | 49 |
|
---|
[59] | 50 | if __name__ == "__main__":
|
---|
| 51 | # Load data
|
---|
| 52 | try:
|
---|
| 53 | handle = open(sys.argv[1],"rU")
|
---|
| 54 | except IndexError:
|
---|
| 55 | print "Usage %s <data_file>" % (sys.argv[0])
|
---|
| 56 | sys.exit(64)
|
---|
| 57 | except IOError:
|
---|
| 58 | print "Unable to open '%s'" % (sys.argv[1])
|
---|
| 59 | sys.exit(64)
|
---|
| 60 |
|
---|
| 61 | seq = handle.read()
|
---|
| 62 | handle.close()
|
---|
| 63 | reading_frames(seq)
|
---|