| Trees | Indices | Help |
|---|
|
|
1 import string
2 import re
3 import fssp_rec
4 from Bio.Align import Generic
5 from Bio import Alphabet
6 fff_rec = fssp_rec.fff_rec
7 """
8 A module to handle FSSP files. For now it parses only the header, summary and alignment
9 sections.
10
11 functions: read_fssp(file_handle): reads an fssp file into the records. Returns a
12 tuple of two instances.
13 mult_align: returns a Biopyton alignment object
14 """
15 header_records = {
16 'database' : re.compile('^DATABASE'),
17 'pdbid': re.compile('^PDBID'),
18 'header': re.compile('^HEADER'),
19 'compnd': re.compile('^COMPND'),
20 'author': re.compile('^AUTHOR'),
21 'source': re.compile('^SOURCE'),
22 'seqlength': re.compile('^SEQLENGTH'),
23 'nalign': re.compile('^NALIGN')
24 }
25
26 summary_title = re.compile('## +SUMMARY')
27 summary_rec = re.compile(' *[0-9]+: +[1-9][0-9a-z]{3,3}')
28 alignments_title= re.compile('## +ALIGNMENTS')
29 alignments_rec = re.compile(' *[0-9]+ +-{0,1}[0-9]+')
30 equiv_title = re.compile('## +EQUIVALENCES')
31
34 self.database = None
35 self.pdbid = ''
36 self.header = ''
37 self.compnd = ''
38 self.source = ''
39 self.author = []
40 self.seqlength = 0
41 self.nalign = 0
43 for i in header_records.keys():
44 if header_records[i].match(inline):
45 if i == 'database' or i == 'seqlength' or i == 'nalign':
46 setattr(self,i,int(string.split(inline)[1]))
47 elif i == 'compnd' or i == 'author':
48 setattr(self,i,string.split(inline)[1:])
49 elif i == 'source' or i == 'header':
50 attr = inline[inline.find(' ')+1:].strip()
51 setattr(self,i,attr)
52 else:
53 setattr(self,i,string.split(inline)[1])
54
57 inStr = string.strip(inStr)
58 if len(inStr) <> 1 and len(inStr)<> 2:
59 raise ValueError, 'PosAlign: length not 2 chars' + inStr
60 if inStr == '..':
61 self.aa = '-'
62 self.gap = 1
63 else:
64 self.gap = 0
65 self.aa = inStr[0]
66 if self.aa == string.lower(self.aa):
67 self.aa = 'C'
68 if len(inStr) == 2:
69 self.ss = string.upper(inStr[1])
70 else:
71 self.ss = '0'
72
74 if self.gap:
75 outstring = '..'
76 else:
77 outstring = self.aa+string.lower(self.ss)
78 return outstring
79
80 __str__ = __repr__
81
82
83
84
86 """ Contains info from an FSSP summary record"""
88 self.raw = in_str
89 in_rec = string.split(string.strip(in_str))
90 # print in_rec
91 self.nr = string.atoi(in_rec[0][:-1])
92 self.pdb1 = in_rec[1][:4]
93 if len(in_rec[1]) == 4:
94 self.chain1='0'
95 elif len(in_rec[1]) == 5:
96 self.chain1=in_rec[1][4]
97 else:
98 raise ValueError, 'Bad PDB ID 1'
99 self.pdb2 = in_rec[2][:4]
100 if len(in_rec[2]) == 4:
101 self.chain2='0'
102 elif len(in_rec[2]) == 5:
103 self.chain2=in_rec[2][4]
104 else:
105 raise ValueError, 'Bad PDB ID 2'
106 self.zscore = string.atof(in_rec[3])
107 self.rmsd = string.atof(in_rec[4])
108 self.lali = string.atof(in_rec[5])
109 self.lseq2 = string.atof(in_rec[6])
110 self.pID = string.atof(in_rec[7])
111 self.revers = string.atoi(in_rec[8])
112 self.permut = string.atoi(in_rec[9])
113 self.nfrag = string.atoi(in_rec[10])
114 self.topo = in_rec[11]
115 self.doc = ''
116 for i in in_rec[12:]:
117 self.doc = self.doc + i + ' '
118 self.doc = string.rstrip(self.doc) + '\n'
119
122 __str__ = __repr__
123
126 # print in_fff_rec
127 self.abs_res_num = string.atoi(in_fff_rec[fssp_rec.align.abs_res_num])
128 self.pdb_res_num = string.strip(in_fff_rec[fssp_rec.align.pdb_res_num])
129 self.chain_id = in_fff_rec[fssp_rec.align.chain_id]
130 if self.chain_id == ' ':
131 self.chain_id = '0'
132 self.res_name = in_fff_rec[fssp_rec.align.res_name]
133 if self.res_name == string.lower(self.res_name):
134 self.res_name = 'C'
135 self.ss1 = in_fff_rec[fssp_rec.align.ss1]
136 self.turn3 = in_fff_rec[fssp_rec.align.turn3]
137 self.turn4 = in_fff_rec[fssp_rec.align.turn4]
138 self.turn5 = in_fff_rec[fssp_rec.align.turn5]
139 self.pos_align_dict = {}
140 self.PosAlignList = []
149
150
153 # The following two dictionaries are pointers to records in self
154 # The first dictionary is a "pdb_residue_number: self_key"
155 # The second dictionary is a "absolute_residue_number: self_key"
156 self.pdb_res_dict = {}
157 self.abs_res_dict = {}
158 self.data = {}
160 for i in self.keys():
161 self.abs_res_dict[self[i].abs_res_num] = i
162 self.pdb_res_dict[self[i].pdb_res_num] = i
163 # Given an absolute residue number & chain, returns the relevant fssp
164 # record
167 # Given an PDB residue number & chain, returns the relevant fssp
168 # record
171 # Returns a sequence string
172
174 s = ''
175 sorted_pos_nums = self.abs_res_dict.keys()
176 sorted_pos_nums.sort()
177 for i in sorted_pos_nums:
178 s += self.abs(i).pos_align_dict[num].aa
179 return s
180
182 mult_align_dict = {}
183 for j in self.abs(1).pos_align_dict.keys():
184 mult_align_dict[j] = ''
185 for fssp_rec in self.values():
186 for j in fssp_rec.pos_align_dict.keys():
187 mult_align_dict[j] += fssp_rec.pos_align_dict[j].aa
188 seq_order = mult_align_dict.keys()
189 seq_order.sort()
190 out_str = ''
191 for i in seq_order:
192 out_str += '> %d\n' % i
193 k = 0
194 for j in mult_align_dict[i]:
195 k += 1
196 if k % 72 == 0:
197 out_str += '\n'
198 out_str += j
199 out_str += '\n'
200 return out_str
201
204
205 #
206 # Process a fssp file into its constituents. Return a 2-tuple containing
207 # a list of FSSPSumRecs and a dictionary of alignment records.
208 #
210 header = FSSPHeader()
211 sum_dict = FSSPSumDict()
212 align_dict = FSSPAlignDict()
213 # fssp_handle=open(fssp_handlename)
214 curline = fssp_handle.readline()
215 while not summary_title.match(curline):
216 # Still in title
217 header.fill_header(curline)
218 curline = fssp_handle.readline()
219
220 if not summary_title.match(curline):
221 raise ValueError,'Bad FSSP file: no summary record found'
222 curline = fssp_handle.readline() #Read the title line, discard
223 curline = fssp_handle.readline() #Read the next line
224 # Process the summary records into a list
225 while summary_rec.match(curline):
226 cur_sum_rec = FSSPSumRec(curline)
227 sum_dict[cur_sum_rec.nr] = cur_sum_rec
228 curline = fssp_handle.readline()
229
230 # Outer loop: process everything up to the EQUIVALENCES title record
231 while not equiv_title.match(curline):
232 while (not alignments_title.match(curline) and
233 not equiv_title.match(curline)):
234 curline = fssp_handle.readline()
235 if not alignments_title.match(curline):
236 if equiv_title.match(curline):
237 # print "Reached equiv_title"
238 break
239 else:
240 raise ValueError,'Bad FSSP file: no alignments title record found'
241
242 if equiv_title.match(curline):
243 break
244 # If we got to this point, this means that we have matched an
245 # alignments title. Parse the alignment records in a loop.
246 curline = fssp_handle.readline() #Read the title line, discard
247 curline = fssp_handle.readline() #Read the next line
248 while alignments_rec.match(curline):
249 align_rec = FSSPAlignRec(fff_rec(curline))
250 key = align_rec.chain_id+align_rec.res_name+str(align_rec.pdb_res_num)
251 align_list = string.split(curline[fssp_rec.align.start_aa_list:])
252 if not align_dict.has_key(key):
253 align_dict[key] = align_rec
254 align_dict[key].add_align_list(align_list)
255 curline = fssp_handle.readline()
256 if not curline:
257 print 'EOFEOFEOF'
258 raise EOFError
259 for i in align_dict.values():
260 i.pos_align_list2dict()
261 del i.PosAlignList
262 align_dict.build_resnum_list()
263 return (header, sum_dict, align_dict)
264
| Trees | Indices | Help |
|---|
| Generated by Epydoc 3.0.1 on Mon Sep 15 09:24:26 2008 | http://epydoc.sourceforge.net |