#!/usr/bin/env python # # Parse FASTA file and print statistics on reading frames # BSDLicence # Rick van der Zwet - 0433373 - import sys def _frame_stat(seq, start=0): pdict = {} for n in range(start,len(seq),2): codon = seq[n:n+3] if len(codon) < 3: continue if not pdict.has_key(codon): pdict[codon] = 1 else: pdict[codon] += 1 return(pdict) def reading_frames(seq): '''Parse from left to right and right to left, at position 1,2,3 in in the so called nucleotide triplets See: http://en.wikipedia.org/wiki/Genetic_code''' final = {} for start in [0,1,2]: print "Normal; start %i" % (start) retval = _frame_stat(seq,start) for codon,v in retval.iteritems(): if not final.has_key(codon): final[codon] = [0,0,0,0,0,0] final[codon][start] += v print "Reverse; start %i" % (start) retval = _frame_stat(seq[::-1],start) for codon,v in retval.iteritems(): if not final.has_key(codon): final[codon] = [0,0,0,0,0,0] final[codon][start+3] += v print "CODON : N:0 , N:1 , N:2 , R:0 , R:1 , R:2 " for codon in sorted(final.keys()): print codon," : ", ",".join(["%6i" % x for x in final[codon]]) if __name__ == "__main__": # Load data try: handle = open(sys.argv[1],"rU") except IndexError: print "Usage %s " % (sys.argv[0]) sys.exit(64) except IOError: print "Unable to open '%s'" % (sys.argv[1]) sys.exit(64) seq = handle.read() handle.close() reading_frames(seq)