Last change
on this file since 55 was 53, checked in by Rick van der Zwet, 15 years ago |
Temponary commit
|
-
Property svn:executable
set to
*
|
File size:
1.2 KB
|
Rev | Line | |
---|
[41] | 1 | #!/usr/bin/env python
|
---|
| 2 | from Bio import SeqIO,Seq
|
---|
| 3 | from Bio import Alphabet
|
---|
| 4 | from Bio.Alphabet.IUPAC import ambiguous_dna,unambiguous_dna
|
---|
| 5 | import Bio.Data.CodonTable
|
---|
[53] | 6 | from MultiReplace import MultiReplace
|
---|
[41] | 7 |
|
---|
[53] | 8 | def parse_file(file):
|
---|
| 9 | handle = open("data/AE005174v2-1.fas", "rU")
|
---|
| 10 | for seq_record in SeqIO.parse(handle, "fasta",ambiguous_dna):
|
---|
| 11 | # How to translate damm thing into plain nucleic acid codes
|
---|
| 12 | # http://en.wikipedia.org/wiki/FASTA_format
|
---|
| 13 | stupid = seq_record.seq.__str__()
|
---|
| 14 | fasta_translate = {
|
---|
| 15 | 'r' : 'ga', # purine
|
---|
| 16 | 'y' : 'tc', # pyrimide
|
---|
| 17 | 'k' : 'gt', # keto
|
---|
| 18 | 'm' : 'ac', # amino
|
---|
| 19 | 's' : 'gc', # strong
|
---|
| 20 | 'w' : 'at', # weak
|
---|
| 21 | 'b' : 'gtc',
|
---|
| 22 | 'd' : 'gat',
|
---|
| 23 | 'h' : 'act',
|
---|
| 24 | 'v' : 'gca',
|
---|
| 25 | }
|
---|
| 26 |
|
---|
| 27 | r = MultiReplace(fasta_translate)
|
---|
| 28 | stupid = r.replace(stupid)
|
---|
| 29 |
|
---|
| 30 | pdict = {}
|
---|
| 31 | for n in range(1, len(stupid)):
|
---|
| 32 | protein = stupid[n]
|
---|
| 33 | if not pdict.has_key(protein):
|
---|
| 34 | pdict[protein] = 1
|
---|
| 35 | else:
|
---|
| 36 | pdict[protein] += 1
|
---|
| 37 |
|
---|
| 38 | print pdict
|
---|
[41] | 39 |
|
---|
[53] | 40 | file1 = parse_file("data/AE005174v2-1.fas")
|
---|
| 41 | file2 = parse_file("data/AE005174v2-2.fas")
|
---|
Note:
See
TracBrowser
for help on using the repository browser.