lkjjj26 commited on
Commit
084c5d8
·
1 Parent(s): 8edecbd

update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -18
app.py CHANGED
@@ -174,34 +174,37 @@ class PDBSearchAssistant:
174
  # Clean and parse LLM response
175
  for line in response.split('\n'):
176
  if 'Resolution:' in line:
177
- value = line.split('Resolution:')[1].strip()
178
  if value.lower() not in ['none', 'n/a'] and has_resolution_query:
179
- try:
180
- # Extract just the number
181
- res_value = ''.join(c for c in value if c.isdigit() or c == '.')
182
- resolution_limit = float(res_value)
183
- except ValueError:
184
- pass
185
- elif 'Method:' in line:
 
 
 
186
  value = line.split('Method:')[1].strip()
187
  if value.lower() not in ['none', 'n/a']:
188
  method = value.upper()
189
- elif 'Sequence:' in line:
190
  value = line.split('Sequence:')[1].strip()
191
  if value.lower() not in ['none', 'n/a']:
192
  sequence = value
193
- elif 'PDB_ID:' in line:
194
- value = line.split('PDB_ID:')[1].strip()
195
  if value.lower() not in ['none', 'n/a']:
196
  pdb_id = value
197
- elif 'Organism:' in line:
198
- value = line.split('Organism:')[1].strip()
199
  if value.lower() not in ['none', 'n/a']:
200
  organism = value
201
 
202
  # Build search query
203
  queries = []
204
-
205
  # Check if the query contains a protein sequence pattern
206
  # Check for amino acid sequence (minimum 25 residues)
207
  query_words = query.split()
@@ -295,7 +298,7 @@ class PDBSearchAssistant:
295
 
296
  # Combine queries with AND operator
297
  if queries:
298
- final_query = queries[0]
299
  for q in queries[1:]:
300
  final_query = final_query & q
301
 
@@ -459,7 +462,17 @@ class PDBSearchAssistant:
459
  def process_query(self, query):
460
  """Process query and return results"""
461
  try:
462
- # Get search parameters from LLM
 
 
 
 
 
 
 
 
 
 
463
  formatted_prompt = self.prompt_template.format(query=query)
464
  response = self.pipe(formatted_prompt)[0]['generated_text']
465
  print("Generated parameters:", response)
@@ -478,8 +491,6 @@ class PDBSearchAssistant:
478
  is_sequence_query = any(keyword in query.lower() for keyword in sequence_keywords)
479
 
480
  if is_sequence_query and pdb_id:
481
- # Get sequences for the PDB ID
482
-
483
  sequences = self.get_sequences_by_pdb_id(pdb_id)
484
  return {
485
  "type": "sequence",
 
174
  # Clean and parse LLM response
175
  for line in response.split('\n'):
176
  if 'Resolution:' in line:
177
+ value = line.split('Resolution:')[1].strip().split(" ")[0].strip()
178
  if value.lower() not in ['none', 'n/a'] and has_resolution_query:
179
+ resolution_limit = float(value)
180
+
181
+ # try:
182
+ # # Extract just the number
183
+ # res_value = ''.join(c for c in value if c.isdigit() or c == '.')
184
+ # resolution_limit = float(res_value)
185
+ # except ValueError:
186
+ # pass
187
+
188
+ if 'Method:' in line:
189
  value = line.split('Method:')[1].strip()
190
  if value.lower() not in ['none', 'n/a']:
191
  method = value.upper()
192
+ if 'Sequence:' in line:
193
  value = line.split('Sequence:')[1].strip()
194
  if value.lower() not in ['none', 'n/a']:
195
  sequence = value
196
+ if 'PDB_ID:' in line:
197
+ value = line.split('PDB_ID:')[1].strip().split(" ")[0].strip()
198
  if value.lower() not in ['none', 'n/a']:
199
  pdb_id = value
200
+ if 'Protein:' in line:
201
+ value = line.split('Protein:')[1].split('Resolution:')[0].strip()
202
  if value.lower() not in ['none', 'n/a']:
203
  organism = value
204
 
205
  # Build search query
206
  queries = []
207
+ print(organism)
208
  # Check if the query contains a protein sequence pattern
209
  # Check for amino acid sequence (minimum 25 residues)
210
  query_words = query.split()
 
298
 
299
  # Combine queries with AND operator
300
  if queries:
301
+ final_query = queries[1]
302
  for q in queries[1:]:
303
  final_query = final_query & q
304
 
 
462
  def process_query(self, query):
463
  """Process query and return results"""
464
  try:
465
+ # First check if the query is just a PDB ID
466
+ query_cleaned = query.strip().upper()
467
+ if re.match(r'^[0-9A-Za-z]{4}$', query_cleaned):
468
+ # Direct PDB ID query
469
+ sequences = self.get_sequences_by_pdb_id(query_cleaned)
470
+ return {
471
+ "type": "sequence",
472
+ "results": sequences
473
+ }
474
+
475
+ # If not a direct PDB ID, proceed with LLM processing
476
  formatted_prompt = self.prompt_template.format(query=query)
477
  response = self.pipe(formatted_prompt)[0]['generated_text']
478
  print("Generated parameters:", response)
 
491
  is_sequence_query = any(keyword in query.lower() for keyword in sequence_keywords)
492
 
493
  if is_sequence_query and pdb_id:
 
 
494
  sequences = self.get_sequences_by_pdb_id(pdb_id)
495
  return {
496
  "type": "sequence",