update app.py
Browse files
app.py
CHANGED
@@ -174,34 +174,37 @@ class PDBSearchAssistant:
|
|
174 |
# Clean and parse LLM response
|
175 |
for line in response.split('\n'):
|
176 |
if 'Resolution:' in line:
|
177 |
-
value = line.split('Resolution:')[1].strip()
|
178 |
if value.lower() not in ['none', 'n/a'] and has_resolution_query:
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
|
|
|
|
|
|
186 |
value = line.split('Method:')[1].strip()
|
187 |
if value.lower() not in ['none', 'n/a']:
|
188 |
method = value.upper()
|
189 |
-
|
190 |
value = line.split('Sequence:')[1].strip()
|
191 |
if value.lower() not in ['none', 'n/a']:
|
192 |
sequence = value
|
193 |
-
|
194 |
-
value = line.split('PDB_ID:')[1].strip()
|
195 |
if value.lower() not in ['none', 'n/a']:
|
196 |
pdb_id = value
|
197 |
-
|
198 |
-
value = line.split('
|
199 |
if value.lower() not in ['none', 'n/a']:
|
200 |
organism = value
|
201 |
|
202 |
# Build search query
|
203 |
queries = []
|
204 |
-
|
205 |
# Check if the query contains a protein sequence pattern
|
206 |
# Check for amino acid sequence (minimum 25 residues)
|
207 |
query_words = query.split()
|
@@ -295,7 +298,7 @@ class PDBSearchAssistant:
|
|
295 |
|
296 |
# Combine queries with AND operator
|
297 |
if queries:
|
298 |
-
final_query = queries[
|
299 |
for q in queries[1:]:
|
300 |
final_query = final_query & q
|
301 |
|
@@ -459,7 +462,17 @@ class PDBSearchAssistant:
|
|
459 |
def process_query(self, query):
|
460 |
"""Process query and return results"""
|
461 |
try:
|
462 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
463 |
formatted_prompt = self.prompt_template.format(query=query)
|
464 |
response = self.pipe(formatted_prompt)[0]['generated_text']
|
465 |
print("Generated parameters:", response)
|
@@ -478,8 +491,6 @@ class PDBSearchAssistant:
|
|
478 |
is_sequence_query = any(keyword in query.lower() for keyword in sequence_keywords)
|
479 |
|
480 |
if is_sequence_query and pdb_id:
|
481 |
-
# Get sequences for the PDB ID
|
482 |
-
|
483 |
sequences = self.get_sequences_by_pdb_id(pdb_id)
|
484 |
return {
|
485 |
"type": "sequence",
|
|
|
174 |
# Clean and parse LLM response
|
175 |
for line in response.split('\n'):
|
176 |
if 'Resolution:' in line:
|
177 |
+
value = line.split('Resolution:')[1].strip().split(" ")[0].strip()
|
178 |
if value.lower() not in ['none', 'n/a'] and has_resolution_query:
|
179 |
+
resolution_limit = float(value)
|
180 |
+
|
181 |
+
# try:
|
182 |
+
# # Extract just the number
|
183 |
+
# res_value = ''.join(c for c in value if c.isdigit() or c == '.')
|
184 |
+
# resolution_limit = float(res_value)
|
185 |
+
# except ValueError:
|
186 |
+
# pass
|
187 |
+
|
188 |
+
if 'Method:' in line:
|
189 |
value = line.split('Method:')[1].strip()
|
190 |
if value.lower() not in ['none', 'n/a']:
|
191 |
method = value.upper()
|
192 |
+
if 'Sequence:' in line:
|
193 |
value = line.split('Sequence:')[1].strip()
|
194 |
if value.lower() not in ['none', 'n/a']:
|
195 |
sequence = value
|
196 |
+
if 'PDB_ID:' in line:
|
197 |
+
value = line.split('PDB_ID:')[1].strip().split(" ")[0].strip()
|
198 |
if value.lower() not in ['none', 'n/a']:
|
199 |
pdb_id = value
|
200 |
+
if 'Protein:' in line:
|
201 |
+
value = line.split('Protein:')[1].split('Resolution:')[0].strip()
|
202 |
if value.lower() not in ['none', 'n/a']:
|
203 |
organism = value
|
204 |
|
205 |
# Build search query
|
206 |
queries = []
|
207 |
+
print(organism)
|
208 |
# Check if the query contains a protein sequence pattern
|
209 |
# Check for amino acid sequence (minimum 25 residues)
|
210 |
query_words = query.split()
|
|
|
298 |
|
299 |
# Combine queries with AND operator
|
300 |
if queries:
|
301 |
+
final_query = queries[1]
|
302 |
for q in queries[1:]:
|
303 |
final_query = final_query & q
|
304 |
|
|
|
462 |
def process_query(self, query):
|
463 |
"""Process query and return results"""
|
464 |
try:
|
465 |
+
# First check if the query is just a PDB ID
|
466 |
+
query_cleaned = query.strip().upper()
|
467 |
+
if re.match(r'^[0-9A-Za-z]{4}$', query_cleaned):
|
468 |
+
# Direct PDB ID query
|
469 |
+
sequences = self.get_sequences_by_pdb_id(query_cleaned)
|
470 |
+
return {
|
471 |
+
"type": "sequence",
|
472 |
+
"results": sequences
|
473 |
+
}
|
474 |
+
|
475 |
+
# If not a direct PDB ID, proceed with LLM processing
|
476 |
formatted_prompt = self.prompt_template.format(query=query)
|
477 |
response = self.pipe(formatted_prompt)[0]['generated_text']
|
478 |
print("Generated parameters:", response)
|
|
|
491 |
is_sequence_query = any(keyword in query.lower() for keyword in sequence_keywords)
|
492 |
|
493 |
if is_sequence_query and pdb_id:
|
|
|
|
|
494 |
sequences = self.get_sequences_by_pdb_id(pdb_id)
|
495 |
return {
|
496 |
"type": "sequence",
|