Spaces:
Running
Running
“vinit5112”
commited on
Commit
·
65726e0
1
Parent(s):
5b65de2
changes
Browse files- backend/backend_api.py +1 -0
- backend/rag.py +5 -3
- backend/vector_store.py +4 -3
backend/backend_api.py
CHANGED
@@ -50,6 +50,7 @@ async def lifespan(app: FastAPI):
|
|
50 |
|
51 |
collection_name = os.getenv("COLLECTION_NAME", "ca-documents")
|
52 |
rag_system = RAG(google_api_key, collection_name)
|
|
|
53 |
logger.info("RAG system initialized successfully")
|
54 |
|
55 |
except Exception as e:
|
|
|
50 |
|
51 |
collection_name = os.getenv("COLLECTION_NAME", "ca-documents")
|
52 |
rag_system = RAG(google_api_key, collection_name)
|
53 |
+
await rag_system.initialize()
|
54 |
logger.info("RAG system initialized successfully")
|
55 |
|
56 |
except Exception as e:
|
backend/rag.py
CHANGED
@@ -17,9 +17,6 @@ class RAG:
|
|
17 |
# Setup Vector Store (Qdrant configuration is handled via environment variables)
|
18 |
self.vector_store = VectorStore()
|
19 |
|
20 |
-
# Verify vector store is properly initialized
|
21 |
-
asyncio.run(self.vector_store.verify_collection_health())
|
22 |
-
|
23 |
# Setup Text Splitter
|
24 |
self.text_splitter = RecursiveCharacterTextSplitter(
|
25 |
chunk_size=1000,
|
@@ -28,6 +25,11 @@ class RAG:
|
|
28 |
separators=["\n\n", "\n", ". ", " ", ""]
|
29 |
)
|
30 |
|
|
|
|
|
|
|
|
|
|
|
31 |
def process_pdf(self, file_path: str) -> List[str]:
|
32 |
"""Extract text from PDF and split into chunks using RecursiveTextSplitter"""
|
33 |
full_text = ""
|
|
|
17 |
# Setup Vector Store (Qdrant configuration is handled via environment variables)
|
18 |
self.vector_store = VectorStore()
|
19 |
|
|
|
|
|
|
|
20 |
# Setup Text Splitter
|
21 |
self.text_splitter = RecursiveCharacterTextSplitter(
|
22 |
chunk_size=1000,
|
|
|
25 |
separators=["\n\n", "\n", ". ", " ", ""]
|
26 |
)
|
27 |
|
28 |
+
async def initialize(self):
|
29 |
+
"""Asynchronous initialization to be called after object creation."""
|
30 |
+
await self.vector_store.initialize()
|
31 |
+
await self.vector_store.verify_collection_health()
|
32 |
+
|
33 |
def process_pdf(self, file_path: str) -> List[str]:
|
34 |
"""Extract text from PDF and split into chunks using RecursiveTextSplitter"""
|
35 |
full_text = ""
|
backend/vector_store.py
CHANGED
@@ -36,10 +36,11 @@ class VectorStore:
|
|
36 |
|
37 |
# Initialize embedding model with offline support
|
38 |
self.embedding_model = self._initialize_embedding_model()
|
39 |
-
|
40 |
-
# Create collection with proper indices
|
41 |
-
asyncio.run(self._ensure_collection_exists())
|
42 |
|
|
|
|
|
|
|
|
|
43 |
def _initialize_embedding_model(self):
|
44 |
"""Initialize the embedding model from a local directory"""
|
45 |
try:
|
|
|
36 |
|
37 |
# Initialize embedding model with offline support
|
38 |
self.embedding_model = self._initialize_embedding_model()
|
|
|
|
|
|
|
39 |
|
40 |
+
async def initialize(self):
|
41 |
+
"""Asynchronous initialization to be called after object creation."""
|
42 |
+
await self._ensure_collection_exists()
|
43 |
+
|
44 |
def _initialize_embedding_model(self):
|
45 |
"""Initialize the embedding model from a local directory"""
|
46 |
try:
|