Spaces:

lkjjj26
/

query

Sleeping

App Files Files Community

lkjjj26 commited on Jan 10

Commit

084c5d8

1 Parent(s): 8edecbd

update app.py

Browse files

Files changed (1) hide show

app.py +29 -18

app.py CHANGED Viewed

@@ -174,34 +174,37 @@ class PDBSearchAssistant:
             # Clean and parse LLM response
             for line in response.split('\n'):
                 if 'Resolution:' in line:
-                    value = line.split('Resolution:')[1].strip()
                     if value.lower() not in ['none', 'n/a'] and has_resolution_query:
-                        try:
-                            # Extract just the number
-                            res_value = ''.join(c for c in value if c.isdigit() or c == '.')
-                            resolution_limit = float(res_value)
-                        except ValueError:
-                            pass
-                elif 'Method:' in line:
                     value = line.split('Method:')[1].strip()
                     if value.lower() not in ['none', 'n/a']:
                         method = value.upper()
-                elif 'Sequence:' in line:
                     value = line.split('Sequence:')[1].strip()
                     if value.lower() not in ['none', 'n/a']:
                         sequence = value
-                elif 'PDB_ID:' in line:
-                    value = line.split('PDB_ID:')[1].strip()
                     if value.lower() not in ['none', 'n/a']:
                         pdb_id = value
-                elif 'Organism:' in line:
-                    value = line.split('Organism:')[1].strip()
                     if value.lower() not in ['none', 'n/a']:
                         organism = value
             # Build search query
             queries = []
             # Check if the query contains a protein sequence pattern
             # Check for amino acid sequence (minimum 25 residues)
             query_words = query.split()
@@ -295,7 +298,7 @@ class PDBSearchAssistant:
             # Combine queries with AND operator
             if queries:
-                final_query = queries[0]
                 for q in queries[1:]:
                     final_query = final_query & q
@@ -459,7 +462,17 @@ class PDBSearchAssistant:
     def process_query(self, query):
         """Process query and return results"""
         try:
-            # Get search parameters from LLM
             formatted_prompt = self.prompt_template.format(query=query)
             response = self.pipe(formatted_prompt)[0]['generated_text']
             print("Generated parameters:", response)
@@ -478,8 +491,6 @@ class PDBSearchAssistant:
             is_sequence_query = any(keyword in query.lower() for keyword in sequence_keywords)
             if is_sequence_query and pdb_id:
-                # Get sequences for the PDB ID
                 sequences = self.get_sequences_by_pdb_id(pdb_id)
                 return {
                     "type": "sequence",

             # Clean and parse LLM response
             for line in response.split('\n'):
                 if 'Resolution:' in line:
+                    value = line.split('Resolution:')[1].strip().split(" ")[0].strip()
                     if value.lower() not in ['none', 'n/a'] and has_resolution_query:
+                        resolution_limit = float(value)
+                        # try:
+                        #     # Extract just the number
+                        #     res_value = ''.join(c for c in value if c.isdigit() or c == '.')
+                        #     resolution_limit = float(res_value)
+                        # except ValueError:
+                        #     pass
+                if 'Method:' in line:
                     value = line.split('Method:')[1].strip()
                     if value.lower() not in ['none', 'n/a']:
                         method = value.upper()
+                if 'Sequence:' in line:
                     value = line.split('Sequence:')[1].strip()
                     if value.lower() not in ['none', 'n/a']:
                         sequence = value
+                if 'PDB_ID:' in line:
+                    value = line.split('PDB_ID:')[1].strip().split(" ")[0].strip()
                     if value.lower() not in ['none', 'n/a']:
                         pdb_id = value
+                if 'Protein:' in line:
+                    value = line.split('Protein:')[1].split('Resolution:')[0].strip()
                     if value.lower() not in ['none', 'n/a']:
                         organism = value
             # Build search query
             queries = []
+            print(organism)
             # Check if the query contains a protein sequence pattern
             # Check for amino acid sequence (minimum 25 residues)
             query_words = query.split()
             # Combine queries with AND operator
             if queries:
+                final_query = queries[1]
                 for q in queries[1:]:
                     final_query = final_query & q
     def process_query(self, query):
         """Process query and return results"""
         try:
+            # First check if the query is just a PDB ID
+            query_cleaned = query.strip().upper()
+            if re.match(r'^[0-9A-Za-z]{4}$', query_cleaned):
+                # Direct PDB ID query
+                sequences = self.get_sequences_by_pdb_id(query_cleaned)
+                return {
+                    "type": "sequence",
+                    "results": sequences
+                }
+            # If not a direct PDB ID, proceed with LLM processing
             formatted_prompt = self.prompt_template.format(query=query)
             response = self.pipe(formatted_prompt)[0]['generated_text']
             print("Generated parameters:", response)
             is_sequence_query = any(keyword in query.lower() for keyword in sequence_keywords)
             if is_sequence_query and pdb_id:
                 sequences = self.get_sequences_by_pdb_id(pdb_id)
                 return {
                     "type": "sequence",