source: liacs/dbdm/dbdm_4/fasta-hmm.py@ 55

Last change on this file since 55 was 53, checked in by Rick van der Zwet, 15 years ago

Temponary commit

  • Property svn:executable set to *
File size: 1.2 KB
RevLine 
[41]1#!/usr/bin/env python
2from Bio import SeqIO,Seq
3from Bio import Alphabet
4from Bio.Alphabet.IUPAC import ambiguous_dna,unambiguous_dna
5import Bio.Data.CodonTable
[53]6from MultiReplace import MultiReplace
[41]7
[53]8def parse_file(file):
9 handle = open("data/AE005174v2-1.fas", "rU")
10 for seq_record in SeqIO.parse(handle, "fasta",ambiguous_dna):
11 # How to translate damm thing into plain nucleic acid codes
12 # http://en.wikipedia.org/wiki/FASTA_format
13 stupid = seq_record.seq.__str__()
14 fasta_translate = {
15 'r' : 'ga', # purine
16 'y' : 'tc', # pyrimide
17 'k' : 'gt', # keto
18 'm' : 'ac', # amino
19 's' : 'gc', # strong
20 'w' : 'at', # weak
21 'b' : 'gtc',
22 'd' : 'gat',
23 'h' : 'act',
24 'v' : 'gca',
25 }
26
27 r = MultiReplace(fasta_translate)
28 stupid = r.replace(stupid)
29
30 pdict = {}
31 for n in range(1, len(stupid)):
32 protein = stupid[n]
33 if not pdict.has_key(protein):
34 pdict[protein] = 1
35 else:
36 pdict[protein] += 1
37
38 print pdict
[41]39
[53]40file1 = parse_file("data/AE005174v2-1.fas")
41file2 = parse_file("data/AE005174v2-2.fas")
Note: See TracBrowser for help on using the repository browser.