Spaces:
Running
Running
| import logging | |
| from utils.ingestion import DocumentProcessor | |
| from utils.llm import LLMProcessor | |
| class QAEngine: | |
| def __init__(self): | |
| self.processor = DocumentProcessor() | |
| self.llm_processor = LLMProcessor() | |
| self.collection = self.processor.client.get_or_create_collection("document_chunks") # Fix | |
| def query(self, question: str, k: int = 5) -> str: | |
| """Query the document using semantic search and generate an answer""" | |
| # β Correct method for getting embeddings | |
| query_embedding = self.llm_processor.embed_model.embed_query(question) | |
| results = self.collection.query( | |
| query_embeddings=[query_embedding], | |
| n_results=k | |
| ) | |
| chunks = [] | |
| for i in range(len(results["documents"][0])): # Iterate over top-k results | |
| chunks.append({ | |
| "text": results["documents"][0][i], | |
| "headings": results["metadatas"][0][i].get("headings", "[]"), | |
| "page": results["metadatas"][0][i].get("page"), | |
| "content_type": results["metadatas"][0][i].get("content_type") | |
| }) | |
| print(f"\nRelevant chunks for query: '{question}'") | |
| print("=" * 80) | |
| context = self.llm_processor.format_context(chunks) | |
| print(context) | |
| return self.llm_processor.generate_answer(context, question) | |