|
from transformers import pipeline |
|
from rcsbsearchapi import TextQuery, AttributeQuery, Query |
|
from rcsbsearchapi.search import Sort, SequenceQuery |
|
import os |
|
from dotenv import load_dotenv |
|
from shiny import App, render, ui, reactive |
|
import pandas as pd |
|
import warnings |
|
import re |
|
from UniprotKB_P_Sequence_RCSB_API_test import ProteinQuery, ProteinSearchEngine |
|
import plotly.graph_objects as go |
|
from shinywidgets import output_widget, render_widget |
|
import requests |
|
import asyncio |
|
from Bio import PDB |
|
from Bio.PDB.PDBList import PDBList |
|
from Bio.PDB.Polypeptide import protein_letters_3to1 |
|
import shutil |
|
warnings.filterwarnings('ignore') |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
|
class PDBSearchAssistant: |
|
def __init__(self, model_name="google/flan-t5-large"): |
|
|
|
self.pipe = pipeline( |
|
"text2text-generation", |
|
model=model_name, |
|
max_new_tokens=512, |
|
temperature=0.3, |
|
torch_dtype="auto", |
|
device="cpu" |
|
) |
|
|
|
self.prompt_template = """ |
|
Extract specific search parameters from the protein-related query: |
|
1. Protein name or type |
|
2. Resolution cutoff (in ร
) |
|
3. Protein sequence information |
|
4. Specific PDB ID |
|
5. Experimental method (X-RAY, EM, NMR) |
|
6. Organism/Species information |
|
|
|
Format: |
|
Protein: [protein name or type] |
|
Organism: [organism/species if mentioned] |
|
Resolution: [maximum resolution in ร
, if mentioned] |
|
Sequence: [any sequence mentioned] |
|
PDB_ID: [specific PDB ID if mentioned] |
|
Method: [experimental method if mentioned] |
|
|
|
Examples: |
|
Query: "Find human insulin structures with X-ray better than 2.5ร
resolution" |
|
Protein: insulin |
|
Organism: Homo sapiens |
|
Resolution: 2.5 |
|
Sequence: none |
|
PDB_ID: none |
|
Method: X-RAY |
|
|
|
Query: "Find structures containing sequence MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAKSELDKAIGRNTNGVITKDEAEKLFNQDVDAAVRGILRNAKLKPVYDSLDAVRRAALINMVFQMGETGVAGFTNSLRMLQQKRWDEAAVNLAKSRWYNQTPNRAKRVITTFRTGTWDAYKNL" |
|
Protein: none |
|
Organism: none |
|
Resolution: none |
|
Sequence: MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAKSELDKAIGRNTNGVITKDEAEKLFNQDVDAAVRGILRNAKLKPVYDSLDAVRRAALINMVFQMGETGVAGFTNSLRMLQQKRWDEAAVNLAKSRWYNQTPNRAKRVITTFRTGTWDAYKNL |
|
PDB_ID: none |
|
Method: none |
|
|
|
Query: "Get sequence of PDB ID 8ET6" |
|
Protein: none |
|
Organism: none |
|
Resolution: none |
|
Sequence: none |
|
PDB_ID: 8ET6 |
|
Method: none |
|
|
|
Query: "Find mouse lysozyme structures" |
|
Protein: lysozyme |
|
Organism: Mus musculus |
|
Resolution: none |
|
Sequence: none |
|
PDB_ID: none |
|
Method: none |
|
|
|
Query: "Show me E. coli protein structures solved by Cryo-EM" |
|
Protein: none |
|
Organism: Escherichia coli |
|
Resolution: none |
|
Sequence: none |
|
PDB_ID: none |
|
Method: EM |
|
|
|
Query: "Find S. cerevisiae structures with resolution better than 1.8ร
" |
|
Protein: none |
|
Organism: Saccharomyces cerevisiae |
|
Resolution: 1.8 |
|
Sequence: none |
|
PDB_ID: none |
|
Method: none |
|
|
|
Query: "Sequence of 7BZ5" |
|
Protein: none |
|
Organism: none |
|
Resolution: none |
|
Sequence: none |
|
PDB_ID: 7BZ5 |
|
Method: none |
|
|
|
|
|
|
|
Now analyze: |
|
Query: {query} |
|
""" |
|
|
|
self.pdb_dir = "pdb_tmp" |
|
os.makedirs(self.pdb_dir, exist_ok=True) |
|
self.pdbl = PDBList() |
|
|
|
def search_pdb(self, query): |
|
try: |
|
|
|
formatted_prompt = self.prompt_template.format(query=query) |
|
response = self.pipe(formatted_prompt)[0]['generated_text'] |
|
print("Generated parameters:", response) |
|
|
|
|
|
resolution_limit = None |
|
pdb_id = None |
|
sequence = None |
|
method = None |
|
organism = None |
|
has_resolution_query = False |
|
resolution_direction = "less" |
|
|
|
|
|
resolution_terms = { |
|
'better': 'less', |
|
'best': 'less', |
|
'highest': 'less', |
|
'good': 'less', |
|
'fine': 'less', |
|
'worse': 'greater', |
|
'worst': 'greater', |
|
'lowest': 'greater', |
|
'poor': 'greater', |
|
'resolution': None, |
|
'รฅ': None, |
|
'angstrom': None, |
|
'than': None, |
|
'under': 'less', |
|
'below': 'less', |
|
'above': 'greater', |
|
'over': 'greater' |
|
} |
|
|
|
|
|
query_lower = query.lower() |
|
|
|
|
|
for term, direction in resolution_terms.items(): |
|
if term in query_lower: |
|
has_resolution_query = True |
|
if direction: |
|
resolution_direction = direction |
|
|
|
|
|
if re.search(r'\d+\.?\d*\s*รฅ?', query_lower): |
|
has_resolution_query = True |
|
|
|
|
|
for line in response.split('\n'): |
|
if 'Resolution:' in line: |
|
value = line.split('Resolution:')[1].strip() |
|
if value.lower() not in ['none', 'n/a'] and has_resolution_query: |
|
try: |
|
|
|
res_value = ''.join(c for c in value if c.isdigit() or c == '.') |
|
resolution_limit = float(res_value) |
|
except ValueError: |
|
pass |
|
elif 'Method:' in line: |
|
value = line.split('Method:')[1].strip() |
|
if value.lower() not in ['none', 'n/a']: |
|
method = value.upper() |
|
elif 'Sequence:' in line: |
|
value = line.split('Sequence:')[1].strip() |
|
if value.lower() not in ['none', 'n/a']: |
|
sequence = value |
|
elif 'PDB_ID:' in line: |
|
value = line.split('PDB_ID:')[1].strip() |
|
if value.lower() not in ['none', 'n/a']: |
|
pdb_id = value |
|
elif 'Organism:' in line: |
|
value = line.split('Organism:')[1].strip() |
|
if value.lower() not in ['none', 'n/a']: |
|
organism = value |
|
|
|
|
|
queries = [] |
|
|
|
|
|
|
|
query_words = query.split() |
|
for word in query_words: |
|
|
|
if (len(word) >= 25 and |
|
all(c in 'ACDEFGHIKLMNPQRSTVWY' for c in word.upper()) and |
|
sum(c.isupper() for c in word) / len(word) > 0.8): |
|
sequence = word |
|
break |
|
|
|
|
|
if sequence: |
|
if len(sequence) < 25: |
|
print("Warning: Sequence must be at least 25 residues long. Skipping sequence search.") |
|
sequence = None |
|
else: |
|
print(f"Adding sequence search with identity 100% for sequence: {sequence}") |
|
sequence_query = SequenceQuery( |
|
sequence, |
|
identity_cutoff=1.0, |
|
evalue_cutoff=1, |
|
sequence_type="protein" |
|
) |
|
queries.append(sequence_query) |
|
|
|
else: |
|
|
|
clean_query = query.lower() |
|
|
|
|
|
if has_resolution_query: |
|
clean_query = re.sub(r'\d+\.?\d*\s*รฅ?', '', clean_query) |
|
for term in resolution_terms: |
|
clean_query = clean_query.replace(term, '') |
|
|
|
|
|
clean_query = ' '.join(clean_query.split()) |
|
|
|
print("Cleaned query:", clean_query) |
|
|
|
|
|
if clean_query.strip(): |
|
text_query = AttributeQuery( |
|
attribute="struct.title", |
|
operator="contains_phrase", |
|
value=clean_query |
|
) |
|
queries.append(text_query) |
|
|
|
|
|
if resolution_limit and has_resolution_query: |
|
operator = "less_or_equal" if resolution_direction == "less" else "greater_or_equal" |
|
print(f"Adding resolution filter: {operator} {resolution_limit}ร
") |
|
resolution_query = AttributeQuery( |
|
attribute="rcsb_entry_info.resolution_combined", |
|
operator=operator, |
|
value=resolution_limit |
|
) |
|
queries.append(resolution_query) |
|
|
|
|
|
if pdb_id: |
|
print(f"Searching for specific PDB ID: {pdb_id}") |
|
id_query = AttributeQuery( |
|
attribute="rcsb_id", |
|
operator="exact_match", |
|
value=pdb_id.upper() |
|
) |
|
queries = [id_query] |
|
|
|
|
|
if method: |
|
print(f"Adding experimental method filter: {method}") |
|
method_query = AttributeQuery( |
|
attribute="exptl.method", |
|
operator="exact_match", |
|
value=method |
|
) |
|
queries.append(method_query) |
|
|
|
|
|
if organism: |
|
print(f"Adding organism filter: {organism}") |
|
organism_query = AttributeQuery( |
|
attribute="rcsb_entity_source_organism.taxonomy_lineage.name", |
|
operator="exact_match", |
|
value=organism |
|
) |
|
queries.append(organism_query) |
|
|
|
|
|
if queries: |
|
final_query = queries[0] |
|
for q in queries[1:]: |
|
final_query = final_query & q |
|
|
|
print("Final query:", final_query) |
|
|
|
|
|
session = final_query.exec() |
|
results = [] |
|
|
|
|
|
search_engine = ProteinSearchEngine() |
|
|
|
try: |
|
for entry in session: |
|
try: |
|
|
|
if isinstance(entry, dict): |
|
pdb_id = entry.get('identifier') |
|
elif hasattr(entry, 'identifier'): |
|
pdb_id = entry.identifier |
|
else: |
|
pdb_id = str(entry) |
|
|
|
pdb_id = pdb_id.upper() |
|
|
|
if not pdb_id or len(pdb_id) != 4: |
|
continue |
|
|
|
|
|
structure_url = f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}" |
|
response = requests.get(structure_url) |
|
|
|
if response.status_code != 200: |
|
continue |
|
|
|
structure_data = response.json() |
|
|
|
|
|
result = { |
|
'PDB ID': pdb_id, |
|
'Resolution': f"{structure_data.get('rcsb_entry_info', {}).get('resolution_combined', [0.0])[0]:.2f}ร
", |
|
'Method': structure_data.get('exptl', [{}])[0].get('method', 'Unknown'), |
|
'Title': structure_data.get('struct', {}).get('title', 'N/A'), |
|
'Release Date': structure_data.get('rcsb_accession_info', {}).get('initial_release_date', 'N/A') |
|
} |
|
|
|
results.append(result) |
|
|
|
|
|
if len(results) >= 10: |
|
break |
|
|
|
except Exception as e: |
|
print(f"Error processing entry: {str(e)}") |
|
continue |
|
|
|
except Exception as e: |
|
print(f"Error processing results: {str(e)}") |
|
print(f"Error type: {type(e)}") |
|
|
|
print(f"Found {len(results)} structures") |
|
return results |
|
|
|
return [] |
|
|
|
except Exception as e: |
|
print(f"Error during search: {str(e)}") |
|
print(f"Error type: {type(e)}") |
|
return [] |
|
|
|
def get_sequences_by_pdb_id(self, pdb_id): |
|
"""Get sequences for all chains in a PDB structure using Biopython""" |
|
try: |
|
|
|
pdb_path = self.pdbl.retrieve_pdb_file( |
|
pdb_id, |
|
pdir=self.pdb_dir, |
|
file_format="pdb" |
|
) |
|
|
|
if not pdb_path or not os.path.exists(pdb_path): |
|
print(f"Failed to download PDB file for {pdb_id}") |
|
return [] |
|
|
|
|
|
parser = PDB.PDBParser(QUIET=True) |
|
structure = parser.get_structure(pdb_id, pdb_path) |
|
|
|
|
|
structure_url = f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}" |
|
response = requests.get(structure_url) |
|
structure_data = response.json() if response.status_code == 200 else {} |
|
|
|
sequences = [] |
|
|
|
for model in structure: |
|
for chain in model: |
|
sequence = "" |
|
for residue in chain: |
|
if PDB.is_aa(residue, standard=True): |
|
try: |
|
|
|
resname = residue.get_resname() |
|
if resname in protein_letters_3to1: |
|
sequence += protein_letters_3to1[resname] |
|
except: |
|
continue |
|
|
|
if sequence: |
|
chain_info = { |
|
'chain_id': chain.id, |
|
'entity_id': '1', |
|
'description': structure_data.get('struct', {}).get('title', 'N/A'), |
|
'sequence': sequence, |
|
'length': len(sequence), |
|
'resolution': structure_data.get('rcsb_entry_info', {}).get('resolution_combined', [0.0])[0], |
|
'method': structure_data.get('exptl', [{}])[0].get('method', 'Unknown'), |
|
'release_date': structure_data.get('rcsb_accession_info', {}).get('initial_release_date', 'N/A') |
|
} |
|
sequences.append(chain_info) |
|
|
|
|
|
if os.path.exists(pdb_path): |
|
os.remove(pdb_path) |
|
|
|
return sequences |
|
|
|
except Exception as e: |
|
print(f"Error getting sequences for PDB ID {pdb_id}: {str(e)}") |
|
return [] |
|
|
|
def __del__(self): |
|
"""Cleanup temporary directory on object destruction""" |
|
if hasattr(self, 'pdb_dir') and os.path.exists(self.pdb_dir): |
|
shutil.rmtree(self.pdb_dir) |
|
|
|
def process_query(self, query): |
|
"""Process query and return results""" |
|
try: |
|
|
|
formatted_prompt = self.prompt_template.format(query=query) |
|
response = self.pipe(formatted_prompt)[0]['generated_text'] |
|
print("Generated parameters:", response) |
|
|
|
|
|
pdb_id = None |
|
for line in response.split('\n'): |
|
if 'PDB_ID:' in line: |
|
value = line.split('PDB_ID:')[1].strip() |
|
if value.lower() not in ['none', 'n/a']: |
|
pdb_id = value.upper() |
|
break |
|
|
|
|
|
sequence_keywords = ['sequence', 'seq'] |
|
is_sequence_query = any(keyword in query.lower() for keyword in sequence_keywords) |
|
|
|
if is_sequence_query and pdb_id: |
|
|
|
sequences = self.get_sequences_by_pdb_id(pdb_id) |
|
return { |
|
"type": "sequence", |
|
"results": sequences |
|
} |
|
|
|
|
|
return { |
|
"type": "structure", |
|
"results": self.search_pdb(query) |
|
} |
|
|
|
except Exception as e: |
|
print(f"Error processing query: {str(e)}") |
|
return {"type": "structure", "results": []} |
|
|
|
def pdbsummary(name): |
|
|
|
search_engine = ProteinSearchEngine() |
|
|
|
query = ProteinQuery( |
|
name, |
|
max_resolution= 5.0 |
|
) |
|
|
|
results = search_engine.search(query) |
|
|
|
answer = "" |
|
for i, structure in enumerate(results, 1): |
|
answer += f"\n{i}. PDB ID : {structure.pdb_id}\n" |
|
answer += f"\nResolution : {structure.resolution:.2f} A \n" |
|
answer += f"Method : {structure.method}\n Title : {structure.title}\n" |
|
answer += f"Release Date : {structure.release_date}\n Sequence length: {len(structure.sequence)} aa\n" |
|
answer += f" Sequence:\n {structure.sequence}\n" |
|
|
|
return answer |
|
|
|
def create_interactive_table(df): |
|
if df.empty: |
|
return go.Figure() |
|
|
|
|
|
column_order = ['PDB ID', 'Resolution', 'Method', 'Title', 'Release Date'] |
|
df = df[column_order] |
|
|
|
|
|
df['Release Date'] = pd.to_datetime(df['Release Date']).dt.strftime('%Y-%m-%d') |
|
|
|
|
|
table = go.Figure(data=[go.Table( |
|
header=dict( |
|
values=list(df.columns), |
|
fill_color='paleturquoise', |
|
align='center', |
|
font=dict(size=16), |
|
), |
|
cells=dict( |
|
values=[ |
|
[f'<a href="https://www.rcsb.org/structure/{cell}">{cell}</a>' |
|
if i == 0 else cell |
|
for cell in df[col]] |
|
for i, col in enumerate(df.columns) |
|
], |
|
align='center', |
|
font=dict(size=15), |
|
height=35 |
|
), |
|
columnwidth=[80, 80, 100, 400, 100], |
|
customdata=[['html'] * len(df) if i == 0 else [''] * len(df) |
|
for i in range(len(df.columns))], |
|
hoverlabel=dict(bgcolor='white') |
|
)]) |
|
|
|
|
|
table.update_layout( |
|
margin=dict(l=20, r=20, t=20, b=20), |
|
height=450, |
|
autosize=True |
|
) |
|
|
|
return table |
|
|
|
|
|
app_ui = ui.page_fluid( |
|
ui.tags.head( |
|
ui.tags.style(""" |
|
.container-fluid { |
|
max-width: 1200px; |
|
margin: 0 auto; |
|
padding: 20px; |
|
} |
|
.table a { |
|
color: #0d6efd; |
|
text-decoration: none; |
|
} |
|
.table a:hover { |
|
color: #0a58ca; |
|
text-decoration: underline; |
|
} |
|
.shiny-input-container { |
|
max-width: 100%; |
|
margin: 0 auto; |
|
} |
|
#query { |
|
height: 150px; |
|
font-size: 16px; |
|
padding: 15px; |
|
width: 80%; |
|
margin: 0 auto; |
|
display: block; |
|
white-space: pre-wrap; |
|
word-wrap: break-word; |
|
resize: vertical; |
|
overflow-y: auto; |
|
} |
|
.content-wrapper { |
|
text-align: center; |
|
max-width: 1000px; |
|
margin: 0 auto; |
|
} |
|
.search-button { |
|
margin: 20px 0; |
|
} |
|
h2, h4 { |
|
text-align: center; |
|
margin: 20px 0; |
|
} |
|
.example-box { |
|
background-color: #f8f9fa; |
|
border-radius: 8px; |
|
padding: 20px; |
|
margin: 20px auto; |
|
width: 80%; |
|
text-align: left; |
|
} |
|
.example-box p { |
|
font-weight: bold; |
|
margin-bottom: 10px; |
|
padding-left: 20px; |
|
} |
|
.example-box ul { |
|
margin: 0; |
|
padding-left: 40px; |
|
} |
|
.example-box li { |
|
word-wrap: break-word; |
|
margin: 10px 0; |
|
line-height: 1.5; |
|
} |
|
.query-label { |
|
display: block; |
|
text-align: left; |
|
margin-bottom: 10px; |
|
margin-left: 10%; |
|
font-weight: bold; |
|
} |
|
.status-box { |
|
background-color: #f8f9fa; |
|
border-radius: 8px; |
|
padding: 15px; |
|
margin: 20px auto; |
|
width: 80%; |
|
text-align: left; |
|
} |
|
.status-label { |
|
font-weight: bold; |
|
margin-right: 10px; |
|
} |
|
.status-ready { |
|
color: #198754; /* Bootstrap success color */ |
|
font-weight: bold; |
|
} |
|
.sequence-results { |
|
width: 80%; |
|
margin: 20px auto; |
|
text-align: left; |
|
font-family: monospace; |
|
white-space: pre-wrap; |
|
word-wrap: break-word; |
|
background-color: #f8f9fa; |
|
border-radius: 8px; |
|
padding: 20px; |
|
overflow-x: hidden; |
|
} |
|
.sequence-text { |
|
word-break: break-all; |
|
margin: 10px 0; |
|
line-height: 1.5; |
|
} |
|
.status-spinner { |
|
display: none; |
|
margin-left: 10px; |
|
vertical-align: middle; |
|
} |
|
.status-spinner.active { |
|
display: inline-block; |
|
} |
|
""") |
|
), |
|
ui.div( |
|
{"class": "content-wrapper"}, |
|
ui.h2("Advanced PDB Structure Search Tool"), |
|
ui.row( |
|
ui.column(12, |
|
ui.tags.label( |
|
"Search Query", |
|
{"class": "query-label", "for": "query"} |
|
), |
|
ui.input_text_area( |
|
"query", |
|
"", |
|
value="Human insulin", |
|
width="100%", |
|
resize="vertical" |
|
), |
|
) |
|
), |
|
ui.row( |
|
ui.column(12, |
|
ui.div( |
|
{"class": "example-box"}, |
|
ui.p("Example queries:"), |
|
ui.tags.ul( |
|
ui.tags.li("Human hemoglobin C resolution better than 2.5ร
"), |
|
ui.tags.li("Find structures containing sequence MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAKSELDKAIGRNTNGVITKDEAEKLFNQDVDAAVRGILRNAKLKPVYDSLDAVRRAALINMVFQMGETGVAGFTNSLRMLQQKRWDEAAVNLAKSRWYNQTPNRAKRVITTFRTGTWDAYKNL"), |
|
ui.tags.li("Sequence of PDB ID 8ET6") |
|
) |
|
) |
|
) |
|
), |
|
ui.row( |
|
ui.column(12, |
|
ui.div( |
|
{"class": "search-button"}, |
|
ui.input_action_button("search", "Search", |
|
class_="btn-primary btn-lg") |
|
) |
|
) |
|
), |
|
ui.row( |
|
ui.column(12, |
|
ui.h4("Search Parameters:"), |
|
ui.div( |
|
{"class": "status-box"}, |
|
ui.tags.span("Status: ", class_="status-label"), |
|
ui.output_text("search_status", inline=True), |
|
ui.tags.div( |
|
{"class": "status-spinner"}, |
|
ui.tags.i({"class": "fas fa-spinner fa-spin"}) |
|
) |
|
) |
|
) |
|
), |
|
ui.row( |
|
ui.column(12, |
|
ui.h4("Top 10 Results:"), |
|
output_widget("results_table"), |
|
ui.download_button("download", "Download Results", |
|
class_="btn btn-info btn-lg") |
|
) |
|
), |
|
ui.row( |
|
ui.column(12, |
|
ui.div( |
|
{"class": "sequence-results", "id": "sequence-results"}, |
|
ui.h4("Sequences:"), |
|
ui.output_text("sequence_output") |
|
) |
|
) |
|
) |
|
) |
|
) |
|
|
|
def server(input, output, session): |
|
assistant = PDBSearchAssistant() |
|
results_store = reactive.Value({"type": None, "results": []}) |
|
status_store = reactive.Value("Ready") |
|
|
|
@reactive.Effect |
|
@reactive.event(input.search) |
|
def _(): |
|
status_store.set("Searching...") |
|
|
|
query_results = assistant.process_query(input.query()) |
|
results_store.set(query_results) |
|
|
|
if query_results["type"] == "sequence": |
|
if not query_results["results"]: |
|
status_store.set("No sequences found") |
|
else: |
|
status_store.set("Ready") |
|
else: |
|
df = pd.DataFrame(query_results["results"]) |
|
if df.empty: |
|
status_store.set("No structures found") |
|
else: |
|
status_store.set("Ready") |
|
@output |
|
@render_widget |
|
def results_table(): |
|
return create_interactive_table(df) |
|
|
|
@output |
|
@render.text |
|
def search_status(): |
|
return status_store.get() |
|
|
|
@output |
|
@render.download(filename="pdb_search_results.csv") |
|
def download(): |
|
current_results = results_store.get() |
|
if current_results["type"] == "structure": |
|
df = pd.DataFrame(current_results["results"]) |
|
else: |
|
df = pd.DataFrame(current_results["results"]) |
|
return df.to_csv(index=False) |
|
|
|
@output |
|
@render.text |
|
def sequence_output(): |
|
current_results = results_store.get() |
|
if current_results["type"] == "sequence": |
|
sequences = current_results["results"] |
|
if not sequences: |
|
return "No sequences found" |
|
|
|
output_text = [] |
|
for seq in sequences: |
|
output_text.append(f"\nChain {seq['chain_id']} (Entity {seq['entity_id']}):") |
|
output_text.append(f"Description: {seq['description']}") |
|
output_text.append(f"Length: {seq['length']} residues") |
|
output_text.append("Sequence:") |
|
|
|
|
|
sequence = seq['sequence'] |
|
formatted_sequence = '\n'.join([sequence[i:i+60] for i in range(0, len(sequence), 60)]) |
|
output_text.append(formatted_sequence) |
|
output_text.append("-" * 60) |
|
|
|
return "\n".join(output_text) |
|
return "" |
|
|
|
app = App(app_ui, server) |
|
|
|
if __name__ == "__main__": |
|
import nest_asyncio |
|
nest_asyncio.apply() |
|
app.run(host="0.0.0.0", port=7862) |