Changeset 53 for liacs/dbdm/dbdm_4
- Timestamp:
- Dec 20, 2009, 8:56:07 PM (15 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
liacs/dbdm/dbdm_4/fasta-hmm.py
r41 r53 4 4 from Bio.Alphabet.IUPAC import ambiguous_dna,unambiguous_dna 5 5 import Bio.Data.CodonTable 6 from MultiReplace import MultiReplace 6 7 8 def parse_file(file): 9 handle = open("data/AE005174v2-1.fas", "rU") 10 for seq_record in SeqIO.parse(handle, "fasta",ambiguous_dna): 11 # How to translate damm thing into plain nucleic acid codes 12 # http://en.wikipedia.org/wiki/FASTA_format 13 stupid = seq_record.seq.__str__() 14 fasta_translate = { 15 'r' : 'ga', # purine 16 'y' : 'tc', # pyrimide 17 'k' : 'gt', # keto 18 'm' : 'ac', # amino 19 's' : 'gc', # strong 20 'w' : 'at', # weak 21 'b' : 'gtc', 22 'd' : 'gat', 23 'h' : 'act', 24 'v' : 'gca', 25 } 26 27 r = MultiReplace(fasta_translate) 28 stupid = r.replace(stupid) 29 30 pdict = {} 31 for n in range(1, len(stupid)): 32 protein = stupid[n] 33 if not pdict.has_key(protein): 34 pdict[protein] = 1 35 else: 36 pdict[protein] += 1 37 38 print pdict 7 39 8 handle = open("data/AE005174v2-1.fas", "rU") 9 for seq_record in SeqIO.parse(handle, "fasta",ambiguous_dna): 10 print seq_record.id 11 print repr(seq_record.seq) 12 print seq_record.seq.alphabet 13 print seq_record.letter_annotations 14 15 # How to translate damm thing into plain nucleic acid codes 16 # http://en.wikipedia.org/wiki/FASTA_format 17 stupid = seq_record.seq.to_str().translate({'W' : 'G'}) 18 19 pdict = {} 20 for n in range(1, len(stupid)): 21 protein = stupid[n] 22 if not pdict.has_key(protein): 23 pdict[protein] = 1 24 else: 25 pdict[protein] += 1 26 27 print pdict 28 29 40 file1 = parse_file("data/AE005174v2-1.fas") 41 file2 = parse_file("data/AE005174v2-2.fas")
Note:
See TracChangeset
for help on using the changeset viewer.