lkjjj26 commited on
Commit
e0471a2
ยท
1 Parent(s): 7a496d9

update app.py

Browse files
Files changed (1) hide show
  1. app.py +437 -108
app.py CHANGED
@@ -10,6 +10,8 @@ import re
10
  from UniprotKB_P_Sequence_RCSB_API_test import ProteinQuery, ProteinSearchEngine
11
  import plotly.graph_objects as go
12
  from shinywidgets import output_widget, render_widget
 
 
13
  warnings.filterwarnings('ignore')
14
 
15
  # Load environment variables from .env file
@@ -31,31 +33,31 @@ class PDBSearchAssistant:
31
  )
32
 
33
  self.prompt_template = """
34
- Extract specific search parameters from the query, if present:
35
- 1. Resolution cutoff (in ร…)
36
- 2. Sequence information
37
- 3. Specific PDB ID
38
- 4. Experimental method (X-RAY, EM, NMR)
 
 
39
 
40
  Format:
 
 
41
  Resolution: [maximum resolution in ร…, if mentioned]
42
  Sequence: [any sequence mentioned]
43
  PDB_ID: [specific PDB ID if mentioned]
44
  Method: [experimental method if mentioned]
45
 
46
  Examples:
47
- Query: "Find X-ray structures better than 2.5ร… resolution"
 
 
48
  Resolution: 2.5
49
  Sequence: none
50
  PDB_ID: none
51
  Method: X-RAY
52
 
53
- Query: "Show me NMR structures of kinases"
54
- Resolution: none
55
- Sequence: none
56
- PDB_ID: none
57
- Method: NMR
58
-
59
  Now analyze:
60
  Query: {query}
61
  """
@@ -230,30 +232,60 @@ class PDBSearchAssistant:
230
  session = final_query.exec()
231
  results = []
232
 
233
- # Process results safely with additional information
 
 
234
  try:
235
  for entry in session:
236
- # Handle both string and object types
237
- if isinstance(entry, str):
238
- result = {
239
- 'PDB ID': entry
240
- }
241
- else:
242
- # Handle object type
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  result = {
244
- 'PDB ID': entry.identifier
 
 
 
 
245
  }
246
-
247
- results.append(result)
 
 
 
 
 
 
 
 
 
248
  except Exception as e:
249
  print(f"Error processing results: {str(e)}")
250
- # If error occurs during processing, at least return PDB IDs
251
- if isinstance(entry, str):
252
- results.append({'PDB ID': entry})
253
-
254
  print(f"Found {len(results)} structures")
255
  return results
256
-
257
  return []
258
 
259
  except Exception as e:
@@ -261,52 +293,180 @@ class PDBSearchAssistant:
261
  print(f"Error type: {type(e)}")
262
  return []
263
 
264
- def pdbsummary(name):
265
-
266
- search_engine = ProteinSearchEngine()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
 
268
- query = ProteinQuery(
269
- name,
270
- max_resolution= 5.0
271
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
272
 
273
- results = search_engine.search(query)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
 
275
- answer = ""
276
- for i, structure in enumerate(results, 1):
277
- answer += f"\n{i}. PDB ID : {structure.pdb_id}\n"
278
- answer += f"\nResolution : {structure.resolution:.2f} A \n"
279
- answer += f"Method : {structure.method}\n Title : {structure.title}\n"
280
- answer += f"Release Date : {structure.release_date}\n Sequence length: {len(structure.sequence)} aa\n"
281
- answer += f" Sequence:\n {structure.sequence}\n"
 
 
 
 
 
 
 
282
 
283
- return answer
284
 
285
  def create_interactive_table(df):
286
  if df.empty:
287
  return go.Figure()
288
 
 
 
 
 
 
 
 
289
  # Create interactive table
290
  table = go.Figure(data=[go.Table(
291
  header=dict(
292
  values=list(df.columns),
293
  fill_color='paleturquoise',
294
- align='left',
295
- font=dict(size=14),
296
  ),
297
  cells=dict(
298
- values=[df[col] for col in df.columns],
299
- align='left',
300
- font=dict(size=13),
301
- height=30
 
 
 
 
 
302
  ),
303
- columnwidth=[len(str(max(df[col], key=len))) for col in df.columns]
 
 
 
304
  )])
305
 
306
  # Update table layout
307
  table.update_layout(
308
- margin=dict(l=0, r=0, t=0, b=0),
309
- height=400,
310
  autosize=True
311
  )
312
 
@@ -316,6 +476,11 @@ def create_interactive_table(df):
316
  app_ui = ui.page_fluid(
317
  ui.tags.head(
318
  ui.tags.style("""
 
 
 
 
 
319
  .table a {
320
  color: #0d6efd;
321
  text-decoration: none;
@@ -324,86 +489,250 @@ app_ui = ui.page_fluid(
324
  color: #0a58ca;
325
  text-decoration: underline;
326
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  """)
328
  ),
329
- ui.h2("Advanced PDB Structure Search Tool"),
330
- ui.row(
331
- ui.column(12,
332
- ui.input_text("query", "Search Query",
333
- value="Human insulin"),
334
- )
335
- ),
336
- ui.row(
337
- ui.column(12,
338
- ui.p("Example queries:"),
339
- ui.tags.ul(
340
- ui.tags.li("Human hemoglobin C resolution better than 2.5ร…"),
341
- ui.tags.li("Find structures containing sequence MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAKSELDKAIGRNTNGVITKDEAEKLFNQDVDAAVRGILRNAKLKPVYDSLDAVRRAALINMVFQMGETGVAGFTNSLRMLQQKRWDEAAVNLAKSRWYNQTPNRAKRVITTFRTGTWDAYKNL"),
342
-
343
- ),
344
- )
345
- ),
346
- ui.row(
347
- ui.column(12,
348
- ui.input_action_button("search", "Search", class_="btn-primary"),
349
- )
350
- ),
351
- ui.row(
352
- ui.column(12,
353
- ui.h4("Search Parameters:"),
354
- ui.output_text("search_conditions"),
355
- )
356
- ),
357
- ui.row(
358
- ui.column(12,
359
- ui.h4("Top 10 Results:"),
360
- output_widget("results_table"),
361
- ui.download_button("download", "Download Results")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
  )
363
  )
364
  )
365
 
366
  def server(input, output, session):
367
  assistant = PDBSearchAssistant()
368
- results_store = reactive.Value([])
 
369
 
370
  @reactive.Effect
371
  @reactive.event(input.search)
372
  def _():
373
- results = assistant.search_pdb(query=input.query())
374
- results_store.set(results)
375
 
376
- # Convert results to DataFrame and add hyperlinks
377
- df = pd.DataFrame(results)
378
- if not df.empty:
379
- df['PDB ID'] = df['PDB ID'].apply(
380
- lambda x: f'<a href="https://www.rcsb.org/3d-view/{x}" target="_blank">{x}</a>'
381
- )
382
 
383
- @output
384
- @render_widget
385
- def results_table():
386
- return create_interactive_table(df) # id ์ˆœ์œผ๋กœ ์ •๋ ฌ๋˜๋Š”๊ฑฐ์ธ๋“ฏ Top rank ์ˆœ์€ ์•„๋‹˜
 
 
 
 
 
 
 
 
 
387
 
388
  @output
389
  @render.text
390
- def search_conditions():
391
- results = results_store.get()
392
- return f"""
393
- Applied Search Conditions:
394
- - Query: {input.query()}
395
- - Total structures found: {len(results)}
396
- """
397
 
398
  @output
399
  @render.download(filename="pdb_search_results.csv")
400
  def download():
401
- df = pd.DataFrame(results_store.get())
 
 
 
 
402
  return df.to_csv(index=False)
403
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
404
  app = App(app_ui, server)
405
 
406
  if __name__ == "__main__":
407
  import nest_asyncio
408
  nest_asyncio.apply()
409
- app.run(host="0.0.0.0", port=7860)
 
10
  from UniprotKB_P_Sequence_RCSB_API_test import ProteinQuery, ProteinSearchEngine
11
  import plotly.graph_objects as go
12
  from shinywidgets import output_widget, render_widget
13
+ import requests
14
+ import asyncio
15
  warnings.filterwarnings('ignore')
16
 
17
  # Load environment variables from .env file
 
33
  )
34
 
35
  self.prompt_template = """
36
+ Extract specific search parameters from the protein-related query:
37
+ 1. Protein name or type
38
+ 2. Resolution cutoff (in ร…)
39
+ 3. Sequence information
40
+ 4. Specific PDB ID
41
+ 5. Experimental method (X-RAY, EM, NMR)
42
+ 6. Organism/Species information
43
 
44
  Format:
45
+ Protein: [protein name or type]
46
+ Organism: [organism/species if mentioned]
47
  Resolution: [maximum resolution in ร…, if mentioned]
48
  Sequence: [any sequence mentioned]
49
  PDB_ID: [specific PDB ID if mentioned]
50
  Method: [experimental method if mentioned]
51
 
52
  Examples:
53
+ Query: "Find human insulin structures with X-ray better than 2.5ร… resolution"
54
+ Protein: insulin
55
+ Organism: human
56
  Resolution: 2.5
57
  Sequence: none
58
  PDB_ID: none
59
  Method: X-RAY
60
 
 
 
 
 
 
 
61
  Now analyze:
62
  Query: {query}
63
  """
 
232
  session = final_query.exec()
233
  results = []
234
 
235
+ # Process results with additional information
236
+ search_engine = ProteinSearchEngine()
237
+
238
  try:
239
  for entry in session:
240
+ try:
241
+ # PDB ID ์ถ”์ถœ ๋ฐฉ์‹ ๊ฐœ์„ 
242
+ if isinstance(entry, dict):
243
+ pdb_id = entry.get('identifier')
244
+ elif hasattr(entry, 'identifier'):
245
+ pdb_id = entry.identifier
246
+ else:
247
+ pdb_id = str(entry)
248
+
249
+ pdb_id = pdb_id.upper() # PDB ID๋Š” ํ•ญ์ƒ ๋Œ€๋ฌธ์ž
250
+
251
+ if not pdb_id or len(pdb_id) != 4: # PDB ID๋Š” ํ•ญ์ƒ 4์ž๋ฆฌ
252
+ continue
253
+
254
+ # RCSB PDB REST API๋ฅผ ์ง์ ‘ ์‚ฌ์šฉํ•˜์—ฌ ๊ตฌ์กฐ ์ •๋ณด ๊ฐ€์ ธ์˜ค๊ธฐ
255
+ structure_url = f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}"
256
+ response = requests.get(structure_url)
257
+
258
+ if response.status_code != 200:
259
+ continue
260
+
261
+ structure_data = response.json()
262
+
263
+ # ๊ฒฐ๊ณผ ๊ตฌ์„ฑ
264
  result = {
265
+ 'PDB ID': pdb_id,
266
+ 'Resolution': f"{structure_data.get('rcsb_entry_info', {}).get('resolution_combined', [0.0])[0]:.2f}ร…",
267
+ 'Method': structure_data.get('exptl', [{}])[0].get('method', 'Unknown'),
268
+ 'Title': structure_data.get('struct', {}).get('title', 'N/A'),
269
+ 'Release Date': structure_data.get('rcsb_accession_info', {}).get('initial_release_date', 'N/A')
270
  }
271
+
272
+ results.append(result)
273
+
274
+ # Limit to top 10 results
275
+ if len(results) >= 10:
276
+ break
277
+
278
+ except Exception as e:
279
+ print(f"Error processing entry: {str(e)}")
280
+ continue
281
+
282
  except Exception as e:
283
  print(f"Error processing results: {str(e)}")
284
+ print(f"Error type: {type(e)}")
285
+
 
 
286
  print(f"Found {len(results)} structures")
287
  return results
288
+
289
  return []
290
 
291
  except Exception as e:
 
293
  print(f"Error type: {type(e)}")
294
  return []
295
 
296
+ def get_sequences_by_pdb_id(self, pdb_id):
297
+ """Get sequences for all chains in a PDB structure"""
298
+ try:
299
+ # ProteinSearchEngine ์ธ์Šคํ„ด์Šค ์ƒ์„ฑ
300
+ search_engine = ProteinSearchEngine()
301
+
302
+ # ProteinQuery ๊ฐ์ฒด ์ƒ์„ฑ (resolution limit์€ ๋†’๊ฒŒ ์„ค์ •ํ•˜์—ฌ ๋ชจ๋“  ๊ฒฐ๊ณผ ํฌํ•จ)
303
+ query = ProteinQuery(
304
+ name=pdb_id,
305
+ max_resolution=100.0 # ๋†’์€ ๊ฐ’์œผ๋กœ ์„ค์ •ํ•˜์—ฌ ๋ชจ๋“  ๊ตฌ์กฐ ํฌํ•จ
306
+ )
307
+
308
+ # ๊ฒ€์ƒ‰ ์‹คํ–‰
309
+ results = search_engine.search(query)
310
+
311
+ if not results:
312
+ return []
313
+
314
+ sequences = []
315
+ # ๊ฒฐ๊ณผ์—์„œ sequence ์ •๋ณด ์ถ”์ถœ
316
+ for structure in results:
317
+ if structure.pdb_id.upper() == pdb_id.upper():
318
+ chain_info = {
319
+ 'chain_id': 'ALL', # ์ฒด์ธ ์ •๋ณด๋Š” ํ†ตํ•ฉ
320
+ 'entity_id': '1',
321
+ 'description': structure.title,
322
+ 'sequence': structure.sequence,
323
+ 'length': len(structure.sequence),
324
+ 'resolution': structure.resolution,
325
+ 'method': structure.method,
326
+ 'release_date': structure.release_date
327
+ }
328
+ sequences.append(chain_info)
329
+ break # ์ •ํ™•ํ•œ PDB ID ๋งค์น˜๋ฅผ ์ฐพ์œผ๋ฉด ์ค‘๋‹จ
330
+
331
+ # ๊ฒฐ๊ณผ๊ฐ€ ์—†์œผ๋ฉด ์ง์ ‘ API ํ˜ธ์ถœ ์‹œ๋„
332
+ if not sequences:
333
+ print(f"No results found using ProteinSearchEngine, trying direct API call...")
334
+ return self._get_sequences_by_direct_api(pdb_id)
335
+
336
+ return sequences
337
+
338
+ except Exception as e:
339
+ print(f"Error in ProteinSearchEngine search for PDB ID {pdb_id}: {str(e)}")
340
+ # ์—๋Ÿฌ ๋ฐœ์ƒ ์‹œ ์ง์ ‘ API ํ˜ธ์ถœ๋กœ ํด๋ฐฑ
341
+ return self._get_sequences_by_direct_api(pdb_id)
342
 
343
+ def _get_sequences_by_direct_api(self, pdb_id):
344
+ """Fallback method using direct API calls"""
345
+ # ๊ธฐ์กด์˜ get_sequences_by_pdb_id ๋ฉ”์†Œ๋“œ ๋‚ด์šฉ์„ ์—ฌ๊ธฐ๋กœ ์ด๋™
346
+ try:
347
+ url = f"https://data.rcsb.org/rest/v1/core/polymer_entity_instances/{pdb_id}"
348
+ response = requests.get(url)
349
+
350
+ if response.status_code != 200:
351
+ return []
352
+
353
+ chains_data = response.json()
354
+ sequences = []
355
+
356
+ for chain_id in chains_data.keys():
357
+ entity_id = chains_data[chain_id].get('rcsb_polymer_entity_instance_container_identifiers', {}).get('entity_id')
358
+
359
+ if entity_id:
360
+ entity_url = f"https://data.rcsb.org/rest/v1/core/polymer_entity/{pdb_id}/{entity_id}"
361
+ entity_response = requests.get(entity_url)
362
+
363
+ if entity_response.status_code == 200:
364
+ entity_data = entity_response.json()
365
+ sequence = entity_data.get('entity_poly', {}).get('pdbx_seq_one_letter_code', '')
366
+ description = entity_data.get('rcsb_polymer_entity', {}).get('pdbx_description', 'N/A')
367
+
368
+ chain_info = {
369
+ 'chain_id': chain_id,
370
+ 'entity_id': entity_id,
371
+ 'description': description,
372
+ 'sequence': sequence,
373
+ 'length': len(sequence)
374
+ }
375
+ sequences.append(chain_info)
376
+
377
+ return sequences
378
+
379
+ except Exception as e:
380
+ print(f"Error in direct API call for PDB ID {pdb_id}: {str(e)}")
381
+ return []
382
 
383
+ def analyze_query_type(self, query):
384
+ """Analyze query type and extract relevant information"""
385
+ print(f"\nAnalyzing query: '{query}'") # ์ž…๋ ฅ๋œ ์ฟผ๋ฆฌ ์ถœ๋ ฅ
386
+ query = query.lower().strip()
387
+ print(f"Lowercase query: '{query}'") # ์†Œ๋ฌธ์ž๋กœ ๋ณ€ํ™˜๋œ ์ฟผ๋ฆฌ ์ถœ๋ ฅ
388
+
389
+ # Check for sequence query pattern
390
+ sequence_patterns = [
391
+ r"sequence\s+of\s+pdb\s+id\s+([a-zA-Z0-9]{4})",
392
+ r"sequence\s+for\s+pdb\s+id\s+([a-zA-Z0-9]{4})",
393
+ r"get\s+sequence\s+([a-zA-Z0-9]{4})",
394
+ r"([a-zA-Z0-9]{4})\s+sequence"
395
+ ]
396
+
397
+ for i, pattern in enumerate(sequence_patterns):
398
+ print(f"Trying pattern {i+1}: {pattern}") # ๊ฐ ํŒจํ„ด ์‹œ๋„ ์ถœ๋ ฅ
399
+ match = re.search(pattern, query)
400
+ if match:
401
+ pdb_id = match.group(1).upper()
402
+ print(f"Match found! PDB ID: {pdb_id}") # ๋งค์น˜๋œ PDB ID ์ถœ๋ ฅ
403
+ return {
404
+ "type": "sequence",
405
+ "pdb_id": pdb_id
406
+ }
407
+
408
+ print("No sequence pattern matched, treating as structure search") # ๊ตฌ์กฐ ๊ฒ€์ƒ‰์œผ๋กœ ์ฒ˜๋ฆฌ
409
+ return {
410
+ "type": "structure",
411
+ "query": query
412
+ }
413
 
414
+ def process_query(self, query):
415
+ """Process query and return appropriate results"""
416
+ query_info = self.analyze_query_type(query)
417
+
418
+ if query_info["type"] == "sequence":
419
+ return {
420
+ "type": "sequence",
421
+ "results": self.get_sequences_by_pdb_id(query_info["pdb_id"])
422
+ }
423
+ else:
424
+ return {
425
+ "type": "structure",
426
+ "results": self.search_pdb(query_info["query"])
427
+ }
428
 
 
429
 
430
  def create_interactive_table(df):
431
  if df.empty:
432
  return go.Figure()
433
 
434
+ # Reorder columns
435
+ column_order = ['PDB ID', 'Resolution', 'Method', 'Title', 'Release Date']
436
+ df = df[column_order]
437
+
438
+ # Release Date ํ˜•์‹ ๋ณ€๊ฒฝ (YYYY-MM-DD)
439
+ df['Release Date'] = pd.to_datetime(df['Release Date']).dt.strftime('%Y-%m-%d')
440
+
441
  # Create interactive table
442
  table = go.Figure(data=[go.Table(
443
  header=dict(
444
  values=list(df.columns),
445
  fill_color='paleturquoise',
446
+ align='center', # ํ—ค๋” ์ค‘์•™ ์ •๋ ฌ
447
+ font=dict(size=16), # ํ—ค๋” ๊ธ€์ž ํฌ๊ธฐ ์ฆ๊ฐ€
448
  ),
449
  cells=dict(
450
+ values=[
451
+ [f'<a href="https://www.rcsb.org/structure/{cell}">{cell}</a>'
452
+ if i == 0 else cell
453
+ for cell in df[col]]
454
+ for i, col in enumerate(df.columns)
455
+ ],
456
+ align='center', # ์…€ ๋‚ด์šฉ ์ค‘์•™ ์ •๋ ฌ
457
+ font=dict(size=15), # ์…€ ๊ธ€์ž ํฌ๊ธฐ ์ฆ๊ฐ€
458
+ height=35 # ์…€ ๋†’์ด ์ฆ๊ฐ€
459
  ),
460
+ columnwidth=[80, 80, 100, 400, 100],
461
+ customdata=[['html'] * len(df) if i == 0 else [''] * len(df)
462
+ for i in range(len(df.columns))],
463
+ hoverlabel=dict(bgcolor='white')
464
  )])
465
 
466
  # Update table layout
467
  table.update_layout(
468
+ margin=dict(l=20, r=20, t=20, b=20),
469
+ height=450, # ํ…Œ์ด๋ธ” ์ „์ฒด ๋†’์ด ์ฆ๊ฐ€
470
  autosize=True
471
  )
472
 
 
476
  app_ui = ui.page_fluid(
477
  ui.tags.head(
478
  ui.tags.style("""
479
+ .container-fluid {
480
+ max-width: 1200px;
481
+ margin: 0 auto;
482
+ padding: 20px;
483
+ }
484
  .table a {
485
  color: #0d6efd;
486
  text-decoration: none;
 
489
  color: #0a58ca;
490
  text-decoration: underline;
491
  }
492
+ .shiny-input-container {
493
+ max-width: 100%;
494
+ margin: 0 auto;
495
+ }
496
+ #query {
497
+ height: 100px;
498
+ font-size: 16px;
499
+ padding: 15px;
500
+ width: 80%;
501
+ margin: 0 auto;
502
+ display: block;
503
+ }
504
+ .content-wrapper {
505
+ text-align: center;
506
+ max-width: 1000px;
507
+ margin: 0 auto;
508
+ }
509
+ .search-button {
510
+ margin: 20px 0;
511
+ }
512
+ h2, h4 {
513
+ text-align: center;
514
+ margin: 20px 0;
515
+ }
516
+ .example-box {
517
+ background-color: #f8f9fa;
518
+ border-radius: 8px;
519
+ padding: 20px;
520
+ margin: 20px auto;
521
+ width: 80%;
522
+ text-align: left;
523
+ }
524
+ .example-box p {
525
+ font-weight: bold;
526
+ margin-bottom: 10px;
527
+ padding-left: 20px;
528
+ }
529
+ .example-box ul {
530
+ margin: 0;
531
+ padding-left: 40px;
532
+ }
533
+ .example-box li {
534
+ word-wrap: break-word;
535
+ margin: 10px 0;
536
+ line-height: 1.5;
537
+ }
538
+ .query-label {
539
+ display: block;
540
+ text-align: left;
541
+ margin-bottom: 10px;
542
+ margin-left: 10%;
543
+ font-weight: bold;
544
+ }
545
+ .status-box {
546
+ background-color: #f8f9fa;
547
+ border-radius: 8px;
548
+ padding: 15px;
549
+ margin: 20px auto;
550
+ width: 80%;
551
+ text-align: left;
552
+ }
553
+ .status-label {
554
+ font-weight: bold;
555
+ margin-right: 10px;
556
+ }
557
+ .status-ready {
558
+ color: #198754; /* Bootstrap success color */
559
+ font-weight: bold;
560
+ }
561
+ .sequence-results {
562
+ width: 80%;
563
+ margin: 20px auto;
564
+ text-align: left;
565
+ font-family: monospace;
566
+ white-space: pre-wrap;
567
+ word-wrap: break-word;
568
+ background-color: #f8f9fa;
569
+ border-radius: 8px;
570
+ padding: 20px;
571
+ overflow-x: hidden;
572
+ }
573
+ .sequence-text {
574
+ word-break: break-all;
575
+ margin: 10px 0;
576
+ line-height: 1.5;
577
+ }
578
+ .status-spinner {
579
+ display: none;
580
+ margin-left: 10px;
581
+ vertical-align: middle;
582
+ }
583
+ .status-spinner.active {
584
+ display: inline-block;
585
+ }
586
  """)
587
  ),
588
+ ui.div(
589
+ {"class": "content-wrapper"},
590
+ ui.h2("Advanced PDB Structure Search Tool"),
591
+ ui.row(
592
+ ui.column(12,
593
+ ui.tags.label(
594
+ "Search Query",
595
+ {"class": "query-label", "for": "query"}
596
+ ),
597
+ ui.input_text(
598
+ "query",
599
+ "",
600
+ value="Human insulin",
601
+ width="100%"
602
+ ),
603
+ )
604
+ ),
605
+ ui.row(
606
+ ui.column(12,
607
+ ui.div(
608
+ {"class": "example-box"},
609
+ ui.p("Example queries:"),
610
+ ui.tags.ul(
611
+ ui.tags.li("Human hemoglobin C resolution better than 2.5ร…"),
612
+ ui.tags.li("Find structures containing sequence MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAKSELDKAIGRNTNGVITKDEAEKLFNQDVDAAVRGILRNAKLKPVYDSLDAVRRAALINMVFQMGETGVAGFTNSLRMLQQKRWDEAAVNLAKSRWYNQTPNRAKRVITTFRTGTWDAYKNL"),
613
+ ui.tags.li("Sequence of PDB ID 8ET6"),
614
+ ui.tags.li("Get sequence 7BZ5")
615
+ )
616
+ )
617
+ )
618
+ ),
619
+ ui.row(
620
+ ui.column(12,
621
+ ui.div(
622
+ {"class": "search-button"},
623
+ ui.input_action_button("search", "Search",
624
+ class_="btn-primary btn-lg") # ๋ฒ„ํŠผ ํฌ๊ธฐ ์ฆ๊ฐ€
625
+ )
626
+ )
627
+ ),
628
+ ui.row(
629
+ ui.column(12,
630
+ ui.h4("Search Parameters:"),
631
+ ui.div(
632
+ {"class": "status-box"},
633
+ ui.tags.span("Status: ", class_="status-label"),
634
+ ui.output_text("search_status", inline=True),
635
+ ui.tags.div(
636
+ {"class": "status-spinner"},
637
+ ui.tags.i({"class": "fas fa-spinner fa-spin"})
638
+ )
639
+ )
640
+ )
641
+ ),
642
+ ui.row(
643
+ ui.column(12,
644
+ ui.h4("Top 10 Results:"),
645
+ output_widget("results_table"),
646
+ ui.download_button("download", "Download Results",
647
+ class_="btn btn-info btn-lg") # ๋‹ค์šด๋กœ๋“œ ๋ฒ„ํŠผ ์Šคํƒ€์ผ ๊ฐœ์„ 
648
+ )
649
+ ),
650
+ ui.row(
651
+ ui.column(12,
652
+ ui.div(
653
+ {"class": "sequence-results", "id": "sequence-results"},
654
+ ui.h4("Sequences:"),
655
+ ui.output_text("sequence_output")
656
+ )
657
+ )
658
  )
659
  )
660
  )
661
 
662
  def server(input, output, session):
663
  assistant = PDBSearchAssistant()
664
+ results_store = reactive.Value({"type": None, "results": []})
665
+ status_store = reactive.Value("Ready")
666
 
667
  @reactive.Effect
668
  @reactive.event(input.search)
669
  def _():
670
+ # ๊ฒ€์ƒ‰ ์‹œ์ž‘ ์‹œ ์ƒํƒœ ๋ณ€๊ฒฝ
671
+ status_store.set("Searching...")
672
 
673
+ # ํ”„๋กฌํ”„ํŠธ ์ฒ˜๋ฆฌ
674
+ query_results = assistant.process_query(input.query())
675
+ results_store.set(query_results)
 
 
 
676
 
677
+ if query_results["type"] == "sequence":
678
+ if not query_results["results"]:
679
+ status_store.set("No sequences found")
680
+ else:
681
+ status_store.set("Ready") # ๊ฒ€์ƒ‰ ์™„๋ฃŒ ์‹œ Ready๋กœ ๋ณ€๊ฒฝ
682
+ else:
683
+ df = pd.DataFrame(query_results["results"])
684
+ status_store.set("Ready") # ๊ฒ€์ƒ‰ ์™„๋ฃŒ ์‹œ Ready๋กœ ๋ณ€๊ฒฝ
685
+
686
+ @output
687
+ @render_widget
688
+ def results_table():
689
+ return create_interactive_table(df)
690
 
691
  @output
692
  @render.text
693
+ def search_status():
694
+ return status_store.get()
 
 
 
 
 
695
 
696
  @output
697
  @render.download(filename="pdb_search_results.csv")
698
  def download():
699
+ current_results = results_store.get()
700
+ if current_results["type"] == "structure":
701
+ df = pd.DataFrame(current_results["results"])
702
+ else:
703
+ df = pd.DataFrame(current_results["results"])
704
  return df.to_csv(index=False)
705
 
706
+ @output
707
+ @render.text
708
+ def sequence_output():
709
+ current_results = results_store.get()
710
+ print(current_results["type"])
711
+ print(current_results["results"])
712
+ if current_results["type"] == "sequence":
713
+ sequences = current_results["results"]
714
+ if not sequences:
715
+ return "No sequences found"
716
+
717
+ output_text = []
718
+ for seq in sequences:
719
+ output_text.append(f"\nChain {seq['chain_id']} (Entity {seq['entity_id']}):")
720
+ output_text.append(f"Description: {seq['description']}")
721
+ output_text.append(f"Length: {seq['length']} residues")
722
+ output_text.append("Sequence:")
723
+
724
+ # ์‹œํ€€์Šค๋ฅผ 60๊ธ€์ž์”ฉ ๋‚˜๋ˆ„์–ด ์ค„๋ฐ”๊ฟˆ
725
+ sequence = seq['sequence']
726
+ formatted_sequence = '\n'.join([sequence[i:i+60] for i in range(0, len(sequence), 60)])
727
+ output_text.append(formatted_sequence)
728
+ output_text.append("-" * 60) # ๊ตฌ๋ถ„์„  ๊ธธ์ด๋„ ์กฐ์ •
729
+
730
+ return "\n".join(output_text)
731
+ return ""
732
+
733
  app = App(app_ui, server)
734
 
735
  if __name__ == "__main__":
736
  import nest_asyncio
737
  nest_asyncio.apply()
738
+ app.run(host="0.0.0.0", port=7862)