Changeset 53


Ignore:
Timestamp:
Dec 20, 2009, 8:56:07 PM (15 years ago)
Author:
Rick van der Zwet
Message:

Temponary commit

File:
1 edited

Legend:

Unmodified
Added
Removed
  • liacs/dbdm/dbdm_4/fasta-hmm.py

    r41 r53  
    44from Bio.Alphabet.IUPAC import ambiguous_dna,unambiguous_dna
    55import Bio.Data.CodonTable
     6from MultiReplace  import MultiReplace
    67
     8def parse_file(file):
     9    handle = open("data/AE005174v2-1.fas", "rU")
     10    for seq_record in SeqIO.parse(handle, "fasta",ambiguous_dna):
     11        # How to translate damm thing into plain nucleic acid codes
     12        # http://en.wikipedia.org/wiki/FASTA_format
     13        stupid = seq_record.seq.__str__()
     14        fasta_translate = {
     15            'r' : 'ga', # purine
     16            'y' : 'tc', # pyrimide
     17            'k' : 'gt', # keto
     18            'm' : 'ac', # amino
     19            's' : 'gc', # strong
     20            'w' : 'at', # weak
     21            'b' : 'gtc',
     22            'd' : 'gat',
     23            'h' : 'act',
     24            'v' : 'gca',
     25            }
     26           
     27        r = MultiReplace(fasta_translate)
     28        stupid = r.replace(stupid)
     29   
     30        pdict = {}
     31        for n in range(1, len(stupid)):
     32            protein = stupid[n]
     33            if not pdict.has_key(protein):
     34                pdict[protein] = 1
     35            else:
     36                pdict[protein] += 1
     37   
     38        print pdict
    739
    8 handle = open("data/AE005174v2-1.fas", "rU")
    9 for seq_record in SeqIO.parse(handle, "fasta",ambiguous_dna):
    10     print seq_record.id
    11     print repr(seq_record.seq)
    12     print seq_record.seq.alphabet
    13     print seq_record.letter_annotations
    14 
    15     # How to translate damm thing into plain nucleic acid codes
    16     # http://en.wikipedia.org/wiki/FASTA_format
    17     stupid = seq_record.seq.to_str().translate({'W' : 'G'})
    18 
    19     pdict = {}
    20     for n in range(1, len(stupid)):
    21         protein = stupid[n]
    22         if not pdict.has_key(protein):
    23             pdict[protein] = 1
    24         else:
    25             pdict[protein] += 1
    26 
    27     print pdict
    28 
    29 
     40file1 = parse_file("data/AE005174v2-1.fas")
     41file2 = parse_file("data/AE005174v2-2.fas")
Note: See TracChangeset for help on using the changeset viewer.