update app.py
Browse files
app.py
CHANGED
@@ -10,6 +10,8 @@ import re
|
|
10 |
from UniprotKB_P_Sequence_RCSB_API_test import ProteinQuery, ProteinSearchEngine
|
11 |
import plotly.graph_objects as go
|
12 |
from shinywidgets import output_widget, render_widget
|
|
|
|
|
13 |
warnings.filterwarnings('ignore')
|
14 |
|
15 |
# Load environment variables from .env file
|
@@ -31,31 +33,31 @@ class PDBSearchAssistant:
|
|
31 |
)
|
32 |
|
33 |
self.prompt_template = """
|
34 |
-
Extract specific search parameters from the query
|
35 |
-
1.
|
36 |
-
2.
|
37 |
-
3.
|
38 |
-
4.
|
|
|
|
|
39 |
|
40 |
Format:
|
|
|
|
|
41 |
Resolution: [maximum resolution in ร
, if mentioned]
|
42 |
Sequence: [any sequence mentioned]
|
43 |
PDB_ID: [specific PDB ID if mentioned]
|
44 |
Method: [experimental method if mentioned]
|
45 |
|
46 |
Examples:
|
47 |
-
Query: "Find X-ray
|
|
|
|
|
48 |
Resolution: 2.5
|
49 |
Sequence: none
|
50 |
PDB_ID: none
|
51 |
Method: X-RAY
|
52 |
|
53 |
-
Query: "Show me NMR structures of kinases"
|
54 |
-
Resolution: none
|
55 |
-
Sequence: none
|
56 |
-
PDB_ID: none
|
57 |
-
Method: NMR
|
58 |
-
|
59 |
Now analyze:
|
60 |
Query: {query}
|
61 |
"""
|
@@ -230,30 +232,60 @@ class PDBSearchAssistant:
|
|
230 |
session = final_query.exec()
|
231 |
results = []
|
232 |
|
233 |
-
# Process results
|
|
|
|
|
234 |
try:
|
235 |
for entry in session:
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
243 |
result = {
|
244 |
-
'PDB ID':
|
|
|
|
|
|
|
|
|
245 |
}
|
246 |
-
|
247 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
248 |
except Exception as e:
|
249 |
print(f"Error processing results: {str(e)}")
|
250 |
-
|
251 |
-
|
252 |
-
results.append({'PDB ID': entry})
|
253 |
-
|
254 |
print(f"Found {len(results)} structures")
|
255 |
return results
|
256 |
-
|
257 |
return []
|
258 |
|
259 |
except Exception as e:
|
@@ -261,52 +293,180 @@ class PDBSearchAssistant:
|
|
261 |
print(f"Error type: {type(e)}")
|
262 |
return []
|
263 |
|
264 |
-
def
|
265 |
-
|
266 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
267 |
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
272 |
|
273 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
282 |
|
283 |
-
return answer
|
284 |
|
285 |
def create_interactive_table(df):
|
286 |
if df.empty:
|
287 |
return go.Figure()
|
288 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
289 |
# Create interactive table
|
290 |
table = go.Figure(data=[go.Table(
|
291 |
header=dict(
|
292 |
values=list(df.columns),
|
293 |
fill_color='paleturquoise',
|
294 |
-
align='
|
295 |
-
font=dict(size=
|
296 |
),
|
297 |
cells=dict(
|
298 |
-
values=[
|
299 |
-
|
300 |
-
|
301 |
-
|
|
|
|
|
|
|
|
|
|
|
302 |
),
|
303 |
-
columnwidth=[
|
|
|
|
|
|
|
304 |
)])
|
305 |
|
306 |
# Update table layout
|
307 |
table.update_layout(
|
308 |
-
margin=dict(l=
|
309 |
-
height=
|
310 |
autosize=True
|
311 |
)
|
312 |
|
@@ -316,6 +476,11 @@ def create_interactive_table(df):
|
|
316 |
app_ui = ui.page_fluid(
|
317 |
ui.tags.head(
|
318 |
ui.tags.style("""
|
|
|
|
|
|
|
|
|
|
|
319 |
.table a {
|
320 |
color: #0d6efd;
|
321 |
text-decoration: none;
|
@@ -324,86 +489,250 @@ app_ui = ui.page_fluid(
|
|
324 |
color: #0a58ca;
|
325 |
text-decoration: underline;
|
326 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
327 |
""")
|
328 |
),
|
329 |
-
ui.
|
330 |
-
|
331 |
-
ui.
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
ui.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
362 |
)
|
363 |
)
|
364 |
)
|
365 |
|
366 |
def server(input, output, session):
|
367 |
assistant = PDBSearchAssistant()
|
368 |
-
results_store = reactive.Value([])
|
|
|
369 |
|
370 |
@reactive.Effect
|
371 |
@reactive.event(input.search)
|
372 |
def _():
|
373 |
-
|
374 |
-
|
375 |
|
376 |
-
#
|
377 |
-
|
378 |
-
|
379 |
-
df['PDB ID'] = df['PDB ID'].apply(
|
380 |
-
lambda x: f'<a href="https://www.rcsb.org/3d-view/{x}" target="_blank">{x}</a>'
|
381 |
-
)
|
382 |
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
387 |
|
388 |
@output
|
389 |
@render.text
|
390 |
-
def
|
391 |
-
|
392 |
-
return f"""
|
393 |
-
Applied Search Conditions:
|
394 |
-
- Query: {input.query()}
|
395 |
-
- Total structures found: {len(results)}
|
396 |
-
"""
|
397 |
|
398 |
@output
|
399 |
@render.download(filename="pdb_search_results.csv")
|
400 |
def download():
|
401 |
-
|
|
|
|
|
|
|
|
|
402 |
return df.to_csv(index=False)
|
403 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
404 |
app = App(app_ui, server)
|
405 |
|
406 |
if __name__ == "__main__":
|
407 |
import nest_asyncio
|
408 |
nest_asyncio.apply()
|
409 |
-
app.run(host="0.0.0.0", port=
|
|
|
10 |
from UniprotKB_P_Sequence_RCSB_API_test import ProteinQuery, ProteinSearchEngine
|
11 |
import plotly.graph_objects as go
|
12 |
from shinywidgets import output_widget, render_widget
|
13 |
+
import requests
|
14 |
+
import asyncio
|
15 |
warnings.filterwarnings('ignore')
|
16 |
|
17 |
# Load environment variables from .env file
|
|
|
33 |
)
|
34 |
|
35 |
self.prompt_template = """
|
36 |
+
Extract specific search parameters from the protein-related query:
|
37 |
+
1. Protein name or type
|
38 |
+
2. Resolution cutoff (in ร
)
|
39 |
+
3. Sequence information
|
40 |
+
4. Specific PDB ID
|
41 |
+
5. Experimental method (X-RAY, EM, NMR)
|
42 |
+
6. Organism/Species information
|
43 |
|
44 |
Format:
|
45 |
+
Protein: [protein name or type]
|
46 |
+
Organism: [organism/species if mentioned]
|
47 |
Resolution: [maximum resolution in ร
, if mentioned]
|
48 |
Sequence: [any sequence mentioned]
|
49 |
PDB_ID: [specific PDB ID if mentioned]
|
50 |
Method: [experimental method if mentioned]
|
51 |
|
52 |
Examples:
|
53 |
+
Query: "Find human insulin structures with X-ray better than 2.5ร
resolution"
|
54 |
+
Protein: insulin
|
55 |
+
Organism: human
|
56 |
Resolution: 2.5
|
57 |
Sequence: none
|
58 |
PDB_ID: none
|
59 |
Method: X-RAY
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
Now analyze:
|
62 |
Query: {query}
|
63 |
"""
|
|
|
232 |
session = final_query.exec()
|
233 |
results = []
|
234 |
|
235 |
+
# Process results with additional information
|
236 |
+
search_engine = ProteinSearchEngine()
|
237 |
+
|
238 |
try:
|
239 |
for entry in session:
|
240 |
+
try:
|
241 |
+
# PDB ID ์ถ์ถ ๋ฐฉ์ ๊ฐ์
|
242 |
+
if isinstance(entry, dict):
|
243 |
+
pdb_id = entry.get('identifier')
|
244 |
+
elif hasattr(entry, 'identifier'):
|
245 |
+
pdb_id = entry.identifier
|
246 |
+
else:
|
247 |
+
pdb_id = str(entry)
|
248 |
+
|
249 |
+
pdb_id = pdb_id.upper() # PDB ID๋ ํญ์ ๋๋ฌธ์
|
250 |
+
|
251 |
+
if not pdb_id or len(pdb_id) != 4: # PDB ID๋ ํญ์ 4์๋ฆฌ
|
252 |
+
continue
|
253 |
+
|
254 |
+
# RCSB PDB REST API๋ฅผ ์ง์ ์ฌ์ฉํ์ฌ ๊ตฌ์กฐ ์ ๋ณด ๊ฐ์ ธ์ค๊ธฐ
|
255 |
+
structure_url = f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}"
|
256 |
+
response = requests.get(structure_url)
|
257 |
+
|
258 |
+
if response.status_code != 200:
|
259 |
+
continue
|
260 |
+
|
261 |
+
structure_data = response.json()
|
262 |
+
|
263 |
+
# ๊ฒฐ๊ณผ ๊ตฌ์ฑ
|
264 |
result = {
|
265 |
+
'PDB ID': pdb_id,
|
266 |
+
'Resolution': f"{structure_data.get('rcsb_entry_info', {}).get('resolution_combined', [0.0])[0]:.2f}ร
",
|
267 |
+
'Method': structure_data.get('exptl', [{}])[0].get('method', 'Unknown'),
|
268 |
+
'Title': structure_data.get('struct', {}).get('title', 'N/A'),
|
269 |
+
'Release Date': structure_data.get('rcsb_accession_info', {}).get('initial_release_date', 'N/A')
|
270 |
}
|
271 |
+
|
272 |
+
results.append(result)
|
273 |
+
|
274 |
+
# Limit to top 10 results
|
275 |
+
if len(results) >= 10:
|
276 |
+
break
|
277 |
+
|
278 |
+
except Exception as e:
|
279 |
+
print(f"Error processing entry: {str(e)}")
|
280 |
+
continue
|
281 |
+
|
282 |
except Exception as e:
|
283 |
print(f"Error processing results: {str(e)}")
|
284 |
+
print(f"Error type: {type(e)}")
|
285 |
+
|
|
|
|
|
286 |
print(f"Found {len(results)} structures")
|
287 |
return results
|
288 |
+
|
289 |
return []
|
290 |
|
291 |
except Exception as e:
|
|
|
293 |
print(f"Error type: {type(e)}")
|
294 |
return []
|
295 |
|
296 |
+
def get_sequences_by_pdb_id(self, pdb_id):
|
297 |
+
"""Get sequences for all chains in a PDB structure"""
|
298 |
+
try:
|
299 |
+
# ProteinSearchEngine ์ธ์คํด์ค ์์ฑ
|
300 |
+
search_engine = ProteinSearchEngine()
|
301 |
+
|
302 |
+
# ProteinQuery ๊ฐ์ฒด ์์ฑ (resolution limit์ ๋๊ฒ ์ค์ ํ์ฌ ๋ชจ๋ ๊ฒฐ๊ณผ ํฌํจ)
|
303 |
+
query = ProteinQuery(
|
304 |
+
name=pdb_id,
|
305 |
+
max_resolution=100.0 # ๋์ ๊ฐ์ผ๋ก ์ค์ ํ์ฌ ๋ชจ๋ ๊ตฌ์กฐ ํฌํจ
|
306 |
+
)
|
307 |
+
|
308 |
+
# ๊ฒ์ ์คํ
|
309 |
+
results = search_engine.search(query)
|
310 |
+
|
311 |
+
if not results:
|
312 |
+
return []
|
313 |
+
|
314 |
+
sequences = []
|
315 |
+
# ๊ฒฐ๊ณผ์์ sequence ์ ๋ณด ์ถ์ถ
|
316 |
+
for structure in results:
|
317 |
+
if structure.pdb_id.upper() == pdb_id.upper():
|
318 |
+
chain_info = {
|
319 |
+
'chain_id': 'ALL', # ์ฒด์ธ ์ ๋ณด๋ ํตํฉ
|
320 |
+
'entity_id': '1',
|
321 |
+
'description': structure.title,
|
322 |
+
'sequence': structure.sequence,
|
323 |
+
'length': len(structure.sequence),
|
324 |
+
'resolution': structure.resolution,
|
325 |
+
'method': structure.method,
|
326 |
+
'release_date': structure.release_date
|
327 |
+
}
|
328 |
+
sequences.append(chain_info)
|
329 |
+
break # ์ ํํ PDB ID ๋งค์น๋ฅผ ์ฐพ์ผ๋ฉด ์ค๋จ
|
330 |
+
|
331 |
+
# ๊ฒฐ๊ณผ๊ฐ ์์ผ๋ฉด ์ง์ API ํธ์ถ ์๋
|
332 |
+
if not sequences:
|
333 |
+
print(f"No results found using ProteinSearchEngine, trying direct API call...")
|
334 |
+
return self._get_sequences_by_direct_api(pdb_id)
|
335 |
+
|
336 |
+
return sequences
|
337 |
+
|
338 |
+
except Exception as e:
|
339 |
+
print(f"Error in ProteinSearchEngine search for PDB ID {pdb_id}: {str(e)}")
|
340 |
+
# ์๋ฌ ๋ฐ์ ์ ์ง์ API ํธ์ถ๋ก ํด๋ฐฑ
|
341 |
+
return self._get_sequences_by_direct_api(pdb_id)
|
342 |
|
343 |
+
def _get_sequences_by_direct_api(self, pdb_id):
|
344 |
+
"""Fallback method using direct API calls"""
|
345 |
+
# ๊ธฐ์กด์ get_sequences_by_pdb_id ๋ฉ์๋ ๋ด์ฉ์ ์ฌ๊ธฐ๋ก ์ด๋
|
346 |
+
try:
|
347 |
+
url = f"https://data.rcsb.org/rest/v1/core/polymer_entity_instances/{pdb_id}"
|
348 |
+
response = requests.get(url)
|
349 |
+
|
350 |
+
if response.status_code != 200:
|
351 |
+
return []
|
352 |
+
|
353 |
+
chains_data = response.json()
|
354 |
+
sequences = []
|
355 |
+
|
356 |
+
for chain_id in chains_data.keys():
|
357 |
+
entity_id = chains_data[chain_id].get('rcsb_polymer_entity_instance_container_identifiers', {}).get('entity_id')
|
358 |
+
|
359 |
+
if entity_id:
|
360 |
+
entity_url = f"https://data.rcsb.org/rest/v1/core/polymer_entity/{pdb_id}/{entity_id}"
|
361 |
+
entity_response = requests.get(entity_url)
|
362 |
+
|
363 |
+
if entity_response.status_code == 200:
|
364 |
+
entity_data = entity_response.json()
|
365 |
+
sequence = entity_data.get('entity_poly', {}).get('pdbx_seq_one_letter_code', '')
|
366 |
+
description = entity_data.get('rcsb_polymer_entity', {}).get('pdbx_description', 'N/A')
|
367 |
+
|
368 |
+
chain_info = {
|
369 |
+
'chain_id': chain_id,
|
370 |
+
'entity_id': entity_id,
|
371 |
+
'description': description,
|
372 |
+
'sequence': sequence,
|
373 |
+
'length': len(sequence)
|
374 |
+
}
|
375 |
+
sequences.append(chain_info)
|
376 |
+
|
377 |
+
return sequences
|
378 |
+
|
379 |
+
except Exception as e:
|
380 |
+
print(f"Error in direct API call for PDB ID {pdb_id}: {str(e)}")
|
381 |
+
return []
|
382 |
|
383 |
+
def analyze_query_type(self, query):
|
384 |
+
"""Analyze query type and extract relevant information"""
|
385 |
+
print(f"\nAnalyzing query: '{query}'") # ์
๋ ฅ๋ ์ฟผ๋ฆฌ ์ถ๋ ฅ
|
386 |
+
query = query.lower().strip()
|
387 |
+
print(f"Lowercase query: '{query}'") # ์๋ฌธ์๋ก ๋ณํ๋ ์ฟผ๋ฆฌ ์ถ๋ ฅ
|
388 |
+
|
389 |
+
# Check for sequence query pattern
|
390 |
+
sequence_patterns = [
|
391 |
+
r"sequence\s+of\s+pdb\s+id\s+([a-zA-Z0-9]{4})",
|
392 |
+
r"sequence\s+for\s+pdb\s+id\s+([a-zA-Z0-9]{4})",
|
393 |
+
r"get\s+sequence\s+([a-zA-Z0-9]{4})",
|
394 |
+
r"([a-zA-Z0-9]{4})\s+sequence"
|
395 |
+
]
|
396 |
+
|
397 |
+
for i, pattern in enumerate(sequence_patterns):
|
398 |
+
print(f"Trying pattern {i+1}: {pattern}") # ๊ฐ ํจํด ์๋ ์ถ๋ ฅ
|
399 |
+
match = re.search(pattern, query)
|
400 |
+
if match:
|
401 |
+
pdb_id = match.group(1).upper()
|
402 |
+
print(f"Match found! PDB ID: {pdb_id}") # ๋งค์น๋ PDB ID ์ถ๋ ฅ
|
403 |
+
return {
|
404 |
+
"type": "sequence",
|
405 |
+
"pdb_id": pdb_id
|
406 |
+
}
|
407 |
+
|
408 |
+
print("No sequence pattern matched, treating as structure search") # ๊ตฌ์กฐ ๊ฒ์์ผ๋ก ์ฒ๋ฆฌ
|
409 |
+
return {
|
410 |
+
"type": "structure",
|
411 |
+
"query": query
|
412 |
+
}
|
413 |
|
414 |
+
def process_query(self, query):
|
415 |
+
"""Process query and return appropriate results"""
|
416 |
+
query_info = self.analyze_query_type(query)
|
417 |
+
|
418 |
+
if query_info["type"] == "sequence":
|
419 |
+
return {
|
420 |
+
"type": "sequence",
|
421 |
+
"results": self.get_sequences_by_pdb_id(query_info["pdb_id"])
|
422 |
+
}
|
423 |
+
else:
|
424 |
+
return {
|
425 |
+
"type": "structure",
|
426 |
+
"results": self.search_pdb(query_info["query"])
|
427 |
+
}
|
428 |
|
|
|
429 |
|
430 |
def create_interactive_table(df):
|
431 |
if df.empty:
|
432 |
return go.Figure()
|
433 |
|
434 |
+
# Reorder columns
|
435 |
+
column_order = ['PDB ID', 'Resolution', 'Method', 'Title', 'Release Date']
|
436 |
+
df = df[column_order]
|
437 |
+
|
438 |
+
# Release Date ํ์ ๋ณ๊ฒฝ (YYYY-MM-DD)
|
439 |
+
df['Release Date'] = pd.to_datetime(df['Release Date']).dt.strftime('%Y-%m-%d')
|
440 |
+
|
441 |
# Create interactive table
|
442 |
table = go.Figure(data=[go.Table(
|
443 |
header=dict(
|
444 |
values=list(df.columns),
|
445 |
fill_color='paleturquoise',
|
446 |
+
align='center', # ํค๋ ์ค์ ์ ๋ ฌ
|
447 |
+
font=dict(size=16), # ํค๋ ๊ธ์ ํฌ๊ธฐ ์ฆ๊ฐ
|
448 |
),
|
449 |
cells=dict(
|
450 |
+
values=[
|
451 |
+
[f'<a href="https://www.rcsb.org/structure/{cell}">{cell}</a>'
|
452 |
+
if i == 0 else cell
|
453 |
+
for cell in df[col]]
|
454 |
+
for i, col in enumerate(df.columns)
|
455 |
+
],
|
456 |
+
align='center', # ์
๋ด์ฉ ์ค์ ์ ๋ ฌ
|
457 |
+
font=dict(size=15), # ์
๊ธ์ ํฌ๊ธฐ ์ฆ๊ฐ
|
458 |
+
height=35 # ์
๋์ด ์ฆ๊ฐ
|
459 |
),
|
460 |
+
columnwidth=[80, 80, 100, 400, 100],
|
461 |
+
customdata=[['html'] * len(df) if i == 0 else [''] * len(df)
|
462 |
+
for i in range(len(df.columns))],
|
463 |
+
hoverlabel=dict(bgcolor='white')
|
464 |
)])
|
465 |
|
466 |
# Update table layout
|
467 |
table.update_layout(
|
468 |
+
margin=dict(l=20, r=20, t=20, b=20),
|
469 |
+
height=450, # ํ
์ด๋ธ ์ ์ฒด ๋์ด ์ฆ๊ฐ
|
470 |
autosize=True
|
471 |
)
|
472 |
|
|
|
476 |
app_ui = ui.page_fluid(
|
477 |
ui.tags.head(
|
478 |
ui.tags.style("""
|
479 |
+
.container-fluid {
|
480 |
+
max-width: 1200px;
|
481 |
+
margin: 0 auto;
|
482 |
+
padding: 20px;
|
483 |
+
}
|
484 |
.table a {
|
485 |
color: #0d6efd;
|
486 |
text-decoration: none;
|
|
|
489 |
color: #0a58ca;
|
490 |
text-decoration: underline;
|
491 |
}
|
492 |
+
.shiny-input-container {
|
493 |
+
max-width: 100%;
|
494 |
+
margin: 0 auto;
|
495 |
+
}
|
496 |
+
#query {
|
497 |
+
height: 100px;
|
498 |
+
font-size: 16px;
|
499 |
+
padding: 15px;
|
500 |
+
width: 80%;
|
501 |
+
margin: 0 auto;
|
502 |
+
display: block;
|
503 |
+
}
|
504 |
+
.content-wrapper {
|
505 |
+
text-align: center;
|
506 |
+
max-width: 1000px;
|
507 |
+
margin: 0 auto;
|
508 |
+
}
|
509 |
+
.search-button {
|
510 |
+
margin: 20px 0;
|
511 |
+
}
|
512 |
+
h2, h4 {
|
513 |
+
text-align: center;
|
514 |
+
margin: 20px 0;
|
515 |
+
}
|
516 |
+
.example-box {
|
517 |
+
background-color: #f8f9fa;
|
518 |
+
border-radius: 8px;
|
519 |
+
padding: 20px;
|
520 |
+
margin: 20px auto;
|
521 |
+
width: 80%;
|
522 |
+
text-align: left;
|
523 |
+
}
|
524 |
+
.example-box p {
|
525 |
+
font-weight: bold;
|
526 |
+
margin-bottom: 10px;
|
527 |
+
padding-left: 20px;
|
528 |
+
}
|
529 |
+
.example-box ul {
|
530 |
+
margin: 0;
|
531 |
+
padding-left: 40px;
|
532 |
+
}
|
533 |
+
.example-box li {
|
534 |
+
word-wrap: break-word;
|
535 |
+
margin: 10px 0;
|
536 |
+
line-height: 1.5;
|
537 |
+
}
|
538 |
+
.query-label {
|
539 |
+
display: block;
|
540 |
+
text-align: left;
|
541 |
+
margin-bottom: 10px;
|
542 |
+
margin-left: 10%;
|
543 |
+
font-weight: bold;
|
544 |
+
}
|
545 |
+
.status-box {
|
546 |
+
background-color: #f8f9fa;
|
547 |
+
border-radius: 8px;
|
548 |
+
padding: 15px;
|
549 |
+
margin: 20px auto;
|
550 |
+
width: 80%;
|
551 |
+
text-align: left;
|
552 |
+
}
|
553 |
+
.status-label {
|
554 |
+
font-weight: bold;
|
555 |
+
margin-right: 10px;
|
556 |
+
}
|
557 |
+
.status-ready {
|
558 |
+
color: #198754; /* Bootstrap success color */
|
559 |
+
font-weight: bold;
|
560 |
+
}
|
561 |
+
.sequence-results {
|
562 |
+
width: 80%;
|
563 |
+
margin: 20px auto;
|
564 |
+
text-align: left;
|
565 |
+
font-family: monospace;
|
566 |
+
white-space: pre-wrap;
|
567 |
+
word-wrap: break-word;
|
568 |
+
background-color: #f8f9fa;
|
569 |
+
border-radius: 8px;
|
570 |
+
padding: 20px;
|
571 |
+
overflow-x: hidden;
|
572 |
+
}
|
573 |
+
.sequence-text {
|
574 |
+
word-break: break-all;
|
575 |
+
margin: 10px 0;
|
576 |
+
line-height: 1.5;
|
577 |
+
}
|
578 |
+
.status-spinner {
|
579 |
+
display: none;
|
580 |
+
margin-left: 10px;
|
581 |
+
vertical-align: middle;
|
582 |
+
}
|
583 |
+
.status-spinner.active {
|
584 |
+
display: inline-block;
|
585 |
+
}
|
586 |
""")
|
587 |
),
|
588 |
+
ui.div(
|
589 |
+
{"class": "content-wrapper"},
|
590 |
+
ui.h2("Advanced PDB Structure Search Tool"),
|
591 |
+
ui.row(
|
592 |
+
ui.column(12,
|
593 |
+
ui.tags.label(
|
594 |
+
"Search Query",
|
595 |
+
{"class": "query-label", "for": "query"}
|
596 |
+
),
|
597 |
+
ui.input_text(
|
598 |
+
"query",
|
599 |
+
"",
|
600 |
+
value="Human insulin",
|
601 |
+
width="100%"
|
602 |
+
),
|
603 |
+
)
|
604 |
+
),
|
605 |
+
ui.row(
|
606 |
+
ui.column(12,
|
607 |
+
ui.div(
|
608 |
+
{"class": "example-box"},
|
609 |
+
ui.p("Example queries:"),
|
610 |
+
ui.tags.ul(
|
611 |
+
ui.tags.li("Human hemoglobin C resolution better than 2.5ร
"),
|
612 |
+
ui.tags.li("Find structures containing sequence MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAKSELDKAIGRNTNGVITKDEAEKLFNQDVDAAVRGILRNAKLKPVYDSLDAVRRAALINMVFQMGETGVAGFTNSLRMLQQKRWDEAAVNLAKSRWYNQTPNRAKRVITTFRTGTWDAYKNL"),
|
613 |
+
ui.tags.li("Sequence of PDB ID 8ET6"),
|
614 |
+
ui.tags.li("Get sequence 7BZ5")
|
615 |
+
)
|
616 |
+
)
|
617 |
+
)
|
618 |
+
),
|
619 |
+
ui.row(
|
620 |
+
ui.column(12,
|
621 |
+
ui.div(
|
622 |
+
{"class": "search-button"},
|
623 |
+
ui.input_action_button("search", "Search",
|
624 |
+
class_="btn-primary btn-lg") # ๋ฒํผ ํฌ๊ธฐ ์ฆ๊ฐ
|
625 |
+
)
|
626 |
+
)
|
627 |
+
),
|
628 |
+
ui.row(
|
629 |
+
ui.column(12,
|
630 |
+
ui.h4("Search Parameters:"),
|
631 |
+
ui.div(
|
632 |
+
{"class": "status-box"},
|
633 |
+
ui.tags.span("Status: ", class_="status-label"),
|
634 |
+
ui.output_text("search_status", inline=True),
|
635 |
+
ui.tags.div(
|
636 |
+
{"class": "status-spinner"},
|
637 |
+
ui.tags.i({"class": "fas fa-spinner fa-spin"})
|
638 |
+
)
|
639 |
+
)
|
640 |
+
)
|
641 |
+
),
|
642 |
+
ui.row(
|
643 |
+
ui.column(12,
|
644 |
+
ui.h4("Top 10 Results:"),
|
645 |
+
output_widget("results_table"),
|
646 |
+
ui.download_button("download", "Download Results",
|
647 |
+
class_="btn btn-info btn-lg") # ๋ค์ด๋ก๋ ๋ฒํผ ์คํ์ผ ๊ฐ์
|
648 |
+
)
|
649 |
+
),
|
650 |
+
ui.row(
|
651 |
+
ui.column(12,
|
652 |
+
ui.div(
|
653 |
+
{"class": "sequence-results", "id": "sequence-results"},
|
654 |
+
ui.h4("Sequences:"),
|
655 |
+
ui.output_text("sequence_output")
|
656 |
+
)
|
657 |
+
)
|
658 |
)
|
659 |
)
|
660 |
)
|
661 |
|
662 |
def server(input, output, session):
|
663 |
assistant = PDBSearchAssistant()
|
664 |
+
results_store = reactive.Value({"type": None, "results": []})
|
665 |
+
status_store = reactive.Value("Ready")
|
666 |
|
667 |
@reactive.Effect
|
668 |
@reactive.event(input.search)
|
669 |
def _():
|
670 |
+
# ๊ฒ์ ์์ ์ ์ํ ๋ณ๊ฒฝ
|
671 |
+
status_store.set("Searching...")
|
672 |
|
673 |
+
# ํ๋กฌํํธ ์ฒ๋ฆฌ
|
674 |
+
query_results = assistant.process_query(input.query())
|
675 |
+
results_store.set(query_results)
|
|
|
|
|
|
|
676 |
|
677 |
+
if query_results["type"] == "sequence":
|
678 |
+
if not query_results["results"]:
|
679 |
+
status_store.set("No sequences found")
|
680 |
+
else:
|
681 |
+
status_store.set("Ready") # ๊ฒ์ ์๋ฃ ์ Ready๋ก ๋ณ๊ฒฝ
|
682 |
+
else:
|
683 |
+
df = pd.DataFrame(query_results["results"])
|
684 |
+
status_store.set("Ready") # ๊ฒ์ ์๋ฃ ์ Ready๋ก ๋ณ๊ฒฝ
|
685 |
+
|
686 |
+
@output
|
687 |
+
@render_widget
|
688 |
+
def results_table():
|
689 |
+
return create_interactive_table(df)
|
690 |
|
691 |
@output
|
692 |
@render.text
|
693 |
+
def search_status():
|
694 |
+
return status_store.get()
|
|
|
|
|
|
|
|
|
|
|
695 |
|
696 |
@output
|
697 |
@render.download(filename="pdb_search_results.csv")
|
698 |
def download():
|
699 |
+
current_results = results_store.get()
|
700 |
+
if current_results["type"] == "structure":
|
701 |
+
df = pd.DataFrame(current_results["results"])
|
702 |
+
else:
|
703 |
+
df = pd.DataFrame(current_results["results"])
|
704 |
return df.to_csv(index=False)
|
705 |
|
706 |
+
@output
|
707 |
+
@render.text
|
708 |
+
def sequence_output():
|
709 |
+
current_results = results_store.get()
|
710 |
+
print(current_results["type"])
|
711 |
+
print(current_results["results"])
|
712 |
+
if current_results["type"] == "sequence":
|
713 |
+
sequences = current_results["results"]
|
714 |
+
if not sequences:
|
715 |
+
return "No sequences found"
|
716 |
+
|
717 |
+
output_text = []
|
718 |
+
for seq in sequences:
|
719 |
+
output_text.append(f"\nChain {seq['chain_id']} (Entity {seq['entity_id']}):")
|
720 |
+
output_text.append(f"Description: {seq['description']}")
|
721 |
+
output_text.append(f"Length: {seq['length']} residues")
|
722 |
+
output_text.append("Sequence:")
|
723 |
+
|
724 |
+
# ์ํ์ค๋ฅผ 60๊ธ์์ฉ ๋๋์ด ์ค๋ฐ๊ฟ
|
725 |
+
sequence = seq['sequence']
|
726 |
+
formatted_sequence = '\n'.join([sequence[i:i+60] for i in range(0, len(sequence), 60)])
|
727 |
+
output_text.append(formatted_sequence)
|
728 |
+
output_text.append("-" * 60) # ๊ตฌ๋ถ์ ๊ธธ์ด๋ ์กฐ์
|
729 |
+
|
730 |
+
return "\n".join(output_text)
|
731 |
+
return ""
|
732 |
+
|
733 |
app = App(app_ui, server)
|
734 |
|
735 |
if __name__ == "__main__":
|
736 |
import nest_asyncio
|
737 |
nest_asyncio.apply()
|
738 |
+
app.run(host="0.0.0.0", port=7862)
|