#!/usr/bin/env python from Bio import SeqIO,Seq from Bio import Alphabet from Bio.Alphabet.IUPAC import ambiguous_dna,unambiguous_dna import Bio.Data.CodonTable handle = open("data/AE005174v2-1.fas", "rU") for seq_record in SeqIO.parse(handle, "fasta",ambiguous_dna): print seq_record.id print repr(seq_record.seq) print seq_record.seq.alphabet print seq_record.letter_annotations # How to translate damm thing into plain nucleic acid codes # http://en.wikipedia.org/wiki/FASTA_format stupid = seq_record.seq.to_str().translate({'W' : 'G'}) pdict = {} for n in range(1, len(stupid)): protein = stupid[n] if not pdict.has_key(protein): pdict[protein] = 1 else: pdict[protein] += 1 print pdict