Spaces:
Running
Running
“vinit5112”
commited on
Commit
·
8146726
1
Parent(s):
f2611d0
post changes
Browse files- backend/Qdrant.py +9 -9
- backend/rag.py +0 -7
- backend/vector_store.py +0 -64
backend/Qdrant.py
CHANGED
@@ -77,12 +77,12 @@ class QdrantManager:
|
|
77 |
logger.error(error_msg, exc_info=True)
|
78 |
raise ValueError(error_msg) from e
|
79 |
|
80 |
-
# Example usage
|
81 |
-
if __name__ == "__main__":
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
|
|
77 |
logger.error(error_msg, exc_info=True)
|
78 |
raise ValueError(error_msg) from e
|
79 |
|
80 |
+
# # Example usage
|
81 |
+
# if __name__ == "__main__":
|
82 |
+
# try:
|
83 |
+
# qdrant_manager = QdrantManager()
|
84 |
+
# collection_name = "ca-documents"
|
85 |
+
# result = qdrant_manager.get_or_create_company_collection(collection_name)
|
86 |
+
# print(f"Collection name: {result}")
|
87 |
+
# except Exception as e:
|
88 |
+
# print(f"Error: {e}")
|
backend/rag.py
CHANGED
@@ -189,10 +189,3 @@ if __name__ == "__main__":
|
|
189 |
google_api_key="your_google_api_key",
|
190 |
collection_name="ca-documents"
|
191 |
)
|
192 |
-
|
193 |
-
# Upload documents
|
194 |
-
# rag.upload_document("path/to/your/ca_document.pdf")
|
195 |
-
|
196 |
-
# Ask questions
|
197 |
-
# answer = rag.ask_question("What is depreciation?")
|
198 |
-
# print(answer)
|
|
|
189 |
google_api_key="your_google_api_key",
|
190 |
collection_name="ca-documents"
|
191 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/vector_store.py
CHANGED
@@ -233,70 +233,6 @@ class VectorStore:
|
|
233 |
print(f"Error searching: {e}")
|
234 |
return []
|
235 |
|
236 |
-
def search_by_document_id(self, document_id: str) -> Dict:
|
237 |
-
"""Search for a specific document by its ID using the indexed field"""
|
238 |
-
try:
|
239 |
-
# Use scroll to find document by document_id
|
240 |
-
results = self.client.scroll(
|
241 |
-
collection_name=self.collection_name,
|
242 |
-
scroll_filter=models.Filter(
|
243 |
-
must=[
|
244 |
-
models.FieldCondition(
|
245 |
-
key="document_id",
|
246 |
-
match=models.MatchValue(value=document_id)
|
247 |
-
)
|
248 |
-
]
|
249 |
-
),
|
250 |
-
limit=1
|
251 |
-
)
|
252 |
-
|
253 |
-
if results[0]: # results is a tuple (points, next_page_offset)
|
254 |
-
hit = results[0][0] # Get first point
|
255 |
-
return {
|
256 |
-
"text": hit.payload["content"], # Use content field
|
257 |
-
"document_id": hit.payload.get("document_id"),
|
258 |
-
# Include any additional metadata fields
|
259 |
-
**{k: v for k, v in hit.payload.items() if k not in ["content", "document_id"]}
|
260 |
-
}
|
261 |
-
else:
|
262 |
-
return None
|
263 |
-
|
264 |
-
except Exception as e:
|
265 |
-
print(f"Error searching by document ID: {e}")
|
266 |
-
return None
|
267 |
-
|
268 |
-
def search_by_content(self, content_query: str, limit: int = 5) -> List[Dict]:
|
269 |
-
"""Search for documents by content using the TEXT index"""
|
270 |
-
try:
|
271 |
-
# Use scroll with text search filter
|
272 |
-
results = self.client.scroll(
|
273 |
-
collection_name=self.collection_name,
|
274 |
-
scroll_filter=models.Filter(
|
275 |
-
must=[
|
276 |
-
models.FieldCondition(
|
277 |
-
key="content",
|
278 |
-
match=models.MatchText(text=content_query)
|
279 |
-
)
|
280 |
-
]
|
281 |
-
),
|
282 |
-
limit=limit
|
283 |
-
)
|
284 |
-
|
285 |
-
# Return results
|
286 |
-
return [
|
287 |
-
{
|
288 |
-
"text": hit.payload["content"], # Use content field
|
289 |
-
"document_id": hit.payload.get("document_id"),
|
290 |
-
# Include any additional metadata fields
|
291 |
-
**{k: v for k, v in hit.payload.items() if k not in ["content", "document_id"]}
|
292 |
-
}
|
293 |
-
for hit in results[0] # results[0] contains the points
|
294 |
-
]
|
295 |
-
|
296 |
-
except Exception as e:
|
297 |
-
print(f"Error searching by content: {e}")
|
298 |
-
return []
|
299 |
-
|
300 |
def get_collection_info(self) -> Dict:
|
301 |
"""Get information about the collection"""
|
302 |
try:
|
|
|
233 |
print(f"Error searching: {e}")
|
234 |
return []
|
235 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
def get_collection_info(self) -> Dict:
|
237 |
"""Get information about the collection"""
|
238 |
try:
|