Spaces:

0edon
/

test

Paused

App Files Files Community

christopher commited on May 6

Commit

c708265

1 Parent(s): e113735

removed async

Browse files

Files changed (1) hide show

database/query_processor.py +14 -37

database/query_processor.py CHANGED Viewed

@@ -4,8 +4,6 @@ import numpy as np
 from models.LexRank import degree_centrality_scores
 import logging
 from datetime import datetime as dt
-import asyncio
-from concurrent.futures import ThreadPoolExecutor
 logger = logging.getLogger(__name__)
@@ -15,7 +13,6 @@ class QueryProcessor:
         self.summarization_model = summarization_model
         self.nlp_model = nlp_model
         self.db_service = db_service
-        self.executor = ThreadPoolExecutor(max_workers=4)  # For CPU-bound tasks
         logger.info("QueryProcessor initialized")
     async def process(
@@ -26,33 +23,33 @@ class QueryProcessor:
         end_date: Optional[str] = None
     ) -> Dict[str, Any]:
         try:
-            # Date handling (sync but fast)
             start_dt = self._parse_date(start_date) if start_date else None
             end_dt = self._parse_date(end_date) if end_date else None
-            # Async query processing
-            query_embedding = await self._async_encode(query)
             logger.debug(f"Generated embedding for query: {query[:50]}...")
-            # Entity extraction (sync but fast)
-            entities = await asyncio.to_thread(self.nlp_model.extract_entities, query)
             logger.debug(f"Extracted entities: {entities}")
-            # Async database search
             articles = await self._execute_semantic_search(
                 query_embedding,
                 start_dt,
                 end_dt,
                 topic,
-                [ent[0] for ent in entities]
             )
             if not articles:
                 logger.info("No articles found matching criteria")
                 return {"message": "No articles found", "articles": []}
-            # Async summary generation
-            summary_data = await self._async_generate_summary(articles)
             return {
                 "summary": summary_data["summary"],
@@ -94,35 +91,14 @@ class QueryProcessor:
             logger.error(f"Semantic search failed: {str(e)}")
             raise
-    async def _async_encode(self, text: str) -> List[float]:
-        """Run embedding in thread pool"""
-        loop = asyncio.get_running_loop()
-        return await loop.run_in_executor(
-            self.executor,
-            lambda: self.embedding_model.encode(text).tolist()
-        )
-    async def _async_generate_summary(self, articles: List[Dict[str, Any]]) -> Dict[str, Any]:
-        """Run summary generation in thread pool"""
-        loop = asyncio.get_running_loop()
-        return await loop.run_in_executor(
-            self.executor,
-            lambda: self._sync_generate_summary(articles)
-        )
-    def _sync_generate_summary(self, articles: List[Dict[str, Any]]) -> Dict[str, Any]:
-        """Synchronous version for thread pool execution"""
         try:
             # Extract and process content
             sentences = []
             for article in articles:
                 if content := article.get("content"):
-                    sentences.extend(
-                        asyncio.run_coroutine_threadsafe(
-                            asyncio.to_thread(self.nlp_model.tokenize_sentences, content),
-                            loop=asyncio.get_event_loop()
-                        ).result()
-                    )
             if not sentences:
                 logger.warning("No sentences available for summarization")
@@ -131,11 +107,12 @@ class QueryProcessor:
                     "key_sentences": []
                 }
-            # CPU-intensive operations
             embeddings = self.embedding_model.encode(sentences)
             similarity_matrix = np.inner(embeddings, embeddings)
             centrality_scores = degree_centrality_scores(similarity_matrix, threshold=None)
             top_indices = np.argsort(-centrality_scores)[:10]
             key_sentences = [sentences[idx].strip() for idx in top_indices]

 from models.LexRank import degree_centrality_scores
 import logging
 from datetime import datetime as dt
 logger = logging.getLogger(__name__)
         self.summarization_model = summarization_model
         self.nlp_model = nlp_model
         self.db_service = db_service
         logger.info("QueryProcessor initialized")
     async def process(
         end_date: Optional[str] = None
     ) -> Dict[str, Any]:
         try:
+            # Date handling
             start_dt = self._parse_date(start_date) if start_date else None
             end_dt = self._parse_date(end_date) if end_date else None
+            # Query processing
+            query_embedding = self.embedding_model.encode(query).tolist()
             logger.debug(f"Generated embedding for query: {query[:50]}...")
+            # Entity extraction
+            entities = self.nlp_model.extract_entities(query)
             logger.debug(f"Extracted entities: {entities}")
+            # Database search
             articles = await self._execute_semantic_search(
                 query_embedding,
                 start_dt,
                 end_dt,
                 topic,
+                [ent[0] for ent in entities]  # Just the entity texts
             )
             if not articles:
                 logger.info("No articles found matching criteria")
                 return {"message": "No articles found", "articles": []}
+            # Summary generation
+            summary_data = self._generate_summary(articles)
             return {
                 "summary": summary_data["summary"],
             logger.error(f"Semantic search failed: {str(e)}")
             raise
+    def _generate_summary(self, articles: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """Generate summary from articles with fallback handling"""
         try:
             # Extract and process content
             sentences = []
             for article in articles:
                 if content := article.get("content"):
+                    sentences.extend(self.nlp_model.tokenize_sentences(content))
             if not sentences:
                 logger.warning("No sentences available for summarization")
                     "key_sentences": []
                 }
+            # Generate summary
             embeddings = self.embedding_model.encode(sentences)
             similarity_matrix = np.inner(embeddings, embeddings)
             centrality_scores = degree_centrality_scores(similarity_matrix, threshold=None)
+            # Get top 10 most central sentences
             top_indices = np.argsort(-centrality_scores)[:10]
             key_sentences = [sentences[idx].strip() for idx in top_indices]