#!/usr/bin/env python
from Bio import SeqIO,Seq
from Bio import Alphabet
from Bio.Alphabet.IUPAC import ambiguous_dna,unambiguous_dna
import Bio.Data.CodonTable
from MultiReplace  import MultiReplace

def parse_file(file):
    handle = open("data/AE005174v2-1.fas", "rU")
    for seq_record in SeqIO.parse(handle, "fasta",ambiguous_dna):
        # How to translate damm thing into plain nucleic acid codes
        # http://en.wikipedia.org/wiki/FASTA_format
        stupid = seq_record.seq.__str__()
        fasta_translate = { 
            'r' : 'ga', # purine
            'y' : 'tc', # pyrimide
            'k' : 'gt', # keto
            'm' : 'ac', # amino 
            's' : 'gc', # strong
            'w' : 'at', # weak
            'b' : 'gtc',
            'd' : 'gat',
            'h' : 'act',
            'v' : 'gca',
            }
            
        r = MultiReplace(fasta_translate)
        stupid = r.replace(stupid)
    
        pdict = {}
        for n in range(1, len(stupid)):
            protein = stupid[n]
            if not pdict.has_key(protein):
                pdict[protein] = 1
            else:
                pdict[protein] += 1
    
        print pdict

file1 = parse_file("data/AE005174v2-1.fas")
file2 = parse_file("data/AE005174v2-2.fas")