correct pr for the added MongoDB support (#2)
Browse files- correct pr for the added MongoDB support (24a095843b6e4c319539b946a8ec62237e415af8)
Co-authored-by: Furkan Eris <[email protected]>
- README.md +24 -9
- app.py +190 -116
- requirements.txt +3 -1
README.md
CHANGED
|
@@ -20,7 +20,7 @@ A sophisticated system for detecting hallucinations in AI responses using a para
|
|
| 20 |
- **Paraphrase Generation**: Automatically generates semantically equivalent variations of user queries
|
| 21 |
- **Multi-Model Architecture**: Uses Mistral Large for responses and OpenAI's o3-mini as a judge
|
| 22 |
- **Real-time Progress Tracking**: Visual feedback during the analysis process
|
| 23 |
-
- **
|
| 24 |
- **Interactive Web Interface**: Clean, responsive Gradio interface with example queries
|
| 25 |
- **Detailed Analysis**: Provides confidence scores, reasoning, and specific conflicting facts
|
| 26 |
- **Statistics Dashboard**: Real-time tracking of hallucination detection statistics
|
|
@@ -41,11 +41,23 @@ A sophisticated system for detecting hallucinations in AI responses using a para
|
|
| 41 |
1. Create a new Space on Hugging Face
|
| 42 |
2. Select "Gradio" as the SDK
|
| 43 |
3. Add your repository
|
| 44 |
-
4. Set
|
|
|
|
| 45 |
- `HF_MISTRAL_API_KEY`
|
| 46 |
- `HF_OPENAI_API_KEY`
|
|
|
|
| 47 |
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
## Usage
|
| 51 |
|
|
@@ -71,16 +83,19 @@ The application uses Hugging Face Spaces' persistent storage (`/data` directory)
|
|
| 71 |
- Provides confidence scores and reasoning
|
| 72 |
|
| 73 |
3. **Feedback Collection**:
|
| 74 |
-
- User feedback is stored in
|
| 75 |
-
-
|
| 76 |
- Statistics are updated in real-time
|
|
|
|
| 77 |
|
| 78 |
## Data Persistence
|
| 79 |
|
| 80 |
-
The application uses
|
| 81 |
-
-
|
| 82 |
-
-
|
| 83 |
-
- No
|
|
|
|
|
|
|
| 84 |
|
| 85 |
## Contributing
|
| 86 |
|
|
|
|
| 20 |
- **Paraphrase Generation**: Automatically generates semantically equivalent variations of user queries
|
| 21 |
- **Multi-Model Architecture**: Uses Mistral Large for responses and OpenAI's o3-mini as a judge
|
| 22 |
- **Real-time Progress Tracking**: Visual feedback during the analysis process
|
| 23 |
+
- **Permanent Cloud Storage**: User feedback and results are stored in MongoDB Atlas for persistent storage across restarts
|
| 24 |
- **Interactive Web Interface**: Clean, responsive Gradio interface with example queries
|
| 25 |
- **Detailed Analysis**: Provides confidence scores, reasoning, and specific conflicting facts
|
| 26 |
- **Statistics Dashboard**: Real-time tracking of hallucination detection statistics
|
|
|
|
| 41 |
1. Create a new Space on Hugging Face
|
| 42 |
2. Select "Gradio" as the SDK
|
| 43 |
3. Add your repository
|
| 44 |
+
4. Set up a MongoDB Atlas database (see below)
|
| 45 |
+
5. Set the following secrets in your Space's settings:
|
| 46 |
- `HF_MISTRAL_API_KEY`
|
| 47 |
- `HF_OPENAI_API_KEY`
|
| 48 |
+
- `MONGODB_URI`
|
| 49 |
|
| 50 |
+
### MongoDB Atlas Setup
|
| 51 |
+
|
| 52 |
+
For permanent data storage that persists across HuggingFace Space restarts:
|
| 53 |
+
|
| 54 |
+
1. Create a free [MongoDB Atlas account](https://www.mongodb.com/cloud/atlas/register)
|
| 55 |
+
2. Create a new cluster (the free tier is sufficient)
|
| 56 |
+
3. In the "Database Access" menu, create a database user with read/write permissions
|
| 57 |
+
4. In the "Network Access" menu, add IP `0.0.0.0/0` to allow access from anywhere (required for HuggingFace Spaces)
|
| 58 |
+
5. In the "Databases" section, click "Connect" and choose "Connect your application"
|
| 59 |
+
6. Copy the connection string and replace `<password>` with your database user's password
|
| 60 |
+
7. Set this as your `MONGODB_URI` secret in HuggingFace Spaces settings
|
| 61 |
|
| 62 |
## Usage
|
| 63 |
|
|
|
|
| 83 |
- Provides confidence scores and reasoning
|
| 84 |
|
| 85 |
3. **Feedback Collection**:
|
| 86 |
+
- User feedback is stored in MongoDB Atlas
|
| 87 |
+
- Cloud-based persistent storage ensures data survival
|
| 88 |
- Statistics are updated in real-time
|
| 89 |
+
- Data can be exported for further analysis
|
| 90 |
|
| 91 |
## Data Persistence
|
| 92 |
|
| 93 |
+
The application uses MongoDB Atlas for data storage, providing several benefits:
|
| 94 |
+
- **Permanent Storage**: Data persists even when Hugging Face Spaces restart
|
| 95 |
+
- **Scalability**: MongoDB scales as your data grows
|
| 96 |
+
- **Cloud-based**: No reliance on Space-specific storage that can be lost
|
| 97 |
+
- **Query Capabilities**: Powerful query functionality for data analysis
|
| 98 |
+
- **Export Options**: Built-in methods to export data to CSV
|
| 99 |
|
| 100 |
## Contributing
|
| 101 |
|
app.py
CHANGED
|
@@ -14,7 +14,13 @@ import time
|
|
| 14 |
import concurrent.futures
|
| 15 |
from concurrent.futures import ThreadPoolExecutor
|
| 16 |
import threading
|
| 17 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
# Configure logging
|
| 20 |
logging.basicConfig(
|
|
@@ -380,78 +386,44 @@ Your response should be a JSON with the following fields:
|
|
| 380 |
class HallucinationDetectorApp:
|
| 381 |
def __init__(self):
|
| 382 |
self.pas2 = None
|
| 383 |
-
# Use the default HF Spaces persistent storage location
|
| 384 |
-
self.data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
|
| 385 |
-
self.db_path = os.path.join(self.data_dir, "feedback.db")
|
| 386 |
logger.info("Initializing HallucinationDetectorApp")
|
| 387 |
self._initialize_database()
|
| 388 |
self.progress_callback = None
|
| 389 |
|
| 390 |
def _initialize_database(self):
|
| 391 |
-
"""Initialize
|
| 392 |
try:
|
| 393 |
-
#
|
| 394 |
-
os.
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
|
| 418 |
-
conn.commit()
|
| 419 |
-
conn.close()
|
| 420 |
-
logger.info(f"Database initialized successfully at {self.db_path}")
|
| 421 |
except Exception as e:
|
| 422 |
-
logger.error(f"Error initializing
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
self.
|
| 427 |
-
logger.warning(f"Using fallback database location: {self.db_path}")
|
| 428 |
-
|
| 429 |
-
# Try creating database in fallback location
|
| 430 |
-
try:
|
| 431 |
-
conn = sqlite3.connect(self.db_path)
|
| 432 |
-
cursor = conn.cursor()
|
| 433 |
-
cursor.execute('''
|
| 434 |
-
CREATE TABLE IF NOT EXISTS feedback (
|
| 435 |
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 436 |
-
timestamp TEXT,
|
| 437 |
-
original_query TEXT,
|
| 438 |
-
original_response TEXT,
|
| 439 |
-
paraphrased_queries TEXT,
|
| 440 |
-
paraphrased_responses TEXT,
|
| 441 |
-
hallucination_detected INTEGER,
|
| 442 |
-
confidence_score REAL,
|
| 443 |
-
conflicting_facts TEXT,
|
| 444 |
-
reasoning TEXT,
|
| 445 |
-
summary TEXT,
|
| 446 |
-
user_feedback TEXT
|
| 447 |
-
)
|
| 448 |
-
''')
|
| 449 |
-
conn.commit()
|
| 450 |
-
conn.close()
|
| 451 |
-
logger.info(f"Database initialized in fallback location")
|
| 452 |
-
except Exception as fallback_error:
|
| 453 |
-
logger.error(f"Critical error: Could not initialize database in fallback location: {str(fallback_error)}", exc_info=True)
|
| 454 |
-
raise
|
| 455 |
|
| 456 |
def set_progress_callback(self, callback):
|
| 457 |
"""Set the progress callback function"""
|
|
@@ -503,80 +475,182 @@ class HallucinationDetectorApp:
|
|
| 503 |
}
|
| 504 |
|
| 505 |
def save_feedback(self, results, feedback):
|
| 506 |
-
"""Save results and user feedback to
|
| 507 |
try:
|
| 508 |
logger.info("Saving user feedback: %s", feedback)
|
| 509 |
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
results.get('
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
results.get('
|
| 522 |
-
|
| 523 |
-
results.get('
|
| 524 |
-
results.get('
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
# Insert data
|
| 529 |
-
cursor.execute('''
|
| 530 |
-
INSERT INTO feedback (
|
| 531 |
-
timestamp, original_query, original_response,
|
| 532 |
-
paraphrased_queries, paraphrased_responses,
|
| 533 |
-
hallucination_detected, confidence_score,
|
| 534 |
-
conflicting_facts, reasoning, summary, user_feedback
|
| 535 |
-
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
| 536 |
-
''', data)
|
| 537 |
|
| 538 |
-
|
| 539 |
-
|
| 540 |
|
| 541 |
-
logger.info("Feedback saved successfully to
|
| 542 |
return "Feedback saved successfully!"
|
| 543 |
except Exception as e:
|
| 544 |
logger.error("Error saving feedback: %s", str(e), exc_info=True)
|
| 545 |
return f"Error saving feedback: {str(e)}"
|
| 546 |
|
| 547 |
def get_feedback_stats(self):
|
| 548 |
-
"""Get statistics about collected feedback"""
|
| 549 |
try:
|
| 550 |
-
|
| 551 |
-
|
|
|
|
| 552 |
|
| 553 |
# Get total feedback count
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
detection_stats =
|
|
|
|
| 564 |
|
| 565 |
# Get average confidence score
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 570 |
|
| 571 |
return {
|
| 572 |
"total_feedback": total_count,
|
| 573 |
-
"hallucinations_detected": detection_stats.get(
|
| 574 |
-
"no_hallucinations": detection_stats.get(
|
| 575 |
"average_confidence": round(avg_confidence, 2)
|
| 576 |
}
|
| 577 |
except Exception as e:
|
| 578 |
logger.error("Error getting feedback stats: %s", str(e), exc_info=True)
|
| 579 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 580 |
|
| 581 |
|
| 582 |
# Progress tracking for UI updates
|
|
@@ -1480,4 +1554,4 @@ if __name__ == "__main__":
|
|
| 1480 |
|
| 1481 |
# Uncomment this line to run the test function instead of the main interface
|
| 1482 |
# if __name__ == "__main__":
|
| 1483 |
-
# test_progress()
|
|
|
|
| 14 |
import concurrent.futures
|
| 15 |
from concurrent.futures import ThreadPoolExecutor
|
| 16 |
import threading
|
| 17 |
+
import pymongo
|
| 18 |
+
from pymongo import MongoClient
|
| 19 |
+
from bson.objectid import ObjectId
|
| 20 |
+
from dotenv import load_dotenv
|
| 21 |
+
|
| 22 |
+
# Load environment variables
|
| 23 |
+
load_dotenv()
|
| 24 |
|
| 25 |
# Configure logging
|
| 26 |
logging.basicConfig(
|
|
|
|
| 386 |
class HallucinationDetectorApp:
|
| 387 |
def __init__(self):
|
| 388 |
self.pas2 = None
|
|
|
|
|
|
|
|
|
|
| 389 |
logger.info("Initializing HallucinationDetectorApp")
|
| 390 |
self._initialize_database()
|
| 391 |
self.progress_callback = None
|
| 392 |
|
| 393 |
def _initialize_database(self):
|
| 394 |
+
"""Initialize MongoDB connection for persistent feedback storage"""
|
| 395 |
try:
|
| 396 |
+
# Get MongoDB connection string from environment variable
|
| 397 |
+
mongo_uri = os.environ.get("MONGODB_URI")
|
| 398 |
+
|
| 399 |
+
if not mongo_uri:
|
| 400 |
+
logger.warning("MONGODB_URI not found in environment variables. Please set it in HuggingFace Spaces secrets.")
|
| 401 |
+
logger.warning("Using a placeholder URI for now - connection will fail until proper URI is provided.")
|
| 402 |
+
# Use a placeholder - this will fail but allows the app to initialize
|
| 403 |
+
mongo_uri = "mongodb+srv://username:[email protected]/?retryWrites=true&w=majority"
|
| 404 |
+
|
| 405 |
+
# Connect to MongoDB
|
| 406 |
+
self.mongo_client = MongoClient(mongo_uri)
|
| 407 |
+
|
| 408 |
+
# Access or create database
|
| 409 |
+
self.db = self.mongo_client["hallucination_detector"]
|
| 410 |
+
|
| 411 |
+
# Access or create collection
|
| 412 |
+
self.feedback_collection = self.db["feedback"]
|
| 413 |
+
|
| 414 |
+
# Create index on timestamp for faster querying
|
| 415 |
+
self.feedback_collection.create_index("timestamp")
|
| 416 |
+
|
| 417 |
+
# Test connection
|
| 418 |
+
self.mongo_client.admin.command('ping')
|
| 419 |
+
logger.info("MongoDB connection successful")
|
| 420 |
|
|
|
|
|
|
|
|
|
|
| 421 |
except Exception as e:
|
| 422 |
+
logger.error(f"Error initializing MongoDB: {str(e)}", exc_info=True)
|
| 423 |
+
logger.warning("Proceeding without database connection. Data will not be saved persistently.")
|
| 424 |
+
self.mongo_client = None
|
| 425 |
+
self.db = None
|
| 426 |
+
self.feedback_collection = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 427 |
|
| 428 |
def set_progress_callback(self, callback):
|
| 429 |
"""Set the progress callback function"""
|
|
|
|
| 475 |
}
|
| 476 |
|
| 477 |
def save_feedback(self, results, feedback):
|
| 478 |
+
"""Save results and user feedback to MongoDB"""
|
| 479 |
try:
|
| 480 |
logger.info("Saving user feedback: %s", feedback)
|
| 481 |
|
| 482 |
+
if self.feedback_collection is None:
|
| 483 |
+
logger.error("MongoDB connection not available. Cannot save feedback.")
|
| 484 |
+
return "Database connection not available. Feedback not saved."
|
| 485 |
+
|
| 486 |
+
# Prepare document for MongoDB
|
| 487 |
+
document = {
|
| 488 |
+
"timestamp": datetime.now(),
|
| 489 |
+
"original_query": results.get('original_query', ''),
|
| 490 |
+
"original_response": results.get('original_response', ''),
|
| 491 |
+
"paraphrased_queries": results.get('paraphrased_queries', []),
|
| 492 |
+
"paraphrased_responses": results.get('paraphrased_responses', []),
|
| 493 |
+
"hallucination_detected": results.get('hallucination_detected', False),
|
| 494 |
+
"confidence_score": results.get('confidence_score', 0.0),
|
| 495 |
+
"conflicting_facts": results.get('conflicting_facts', []),
|
| 496 |
+
"reasoning": results.get('reasoning', ''),
|
| 497 |
+
"summary": results.get('summary', ''),
|
| 498 |
+
"user_feedback": feedback
|
| 499 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 500 |
|
| 501 |
+
# Insert document into collection
|
| 502 |
+
self.feedback_collection.insert_one(document)
|
| 503 |
|
| 504 |
+
logger.info("Feedback saved successfully to MongoDB")
|
| 505 |
return "Feedback saved successfully!"
|
| 506 |
except Exception as e:
|
| 507 |
logger.error("Error saving feedback: %s", str(e), exc_info=True)
|
| 508 |
return f"Error saving feedback: {str(e)}"
|
| 509 |
|
| 510 |
def get_feedback_stats(self):
|
| 511 |
+
"""Get statistics about collected feedback from MongoDB"""
|
| 512 |
try:
|
| 513 |
+
if self.feedback_collection is None:
|
| 514 |
+
logger.error("MongoDB connection not available. Cannot get feedback stats.")
|
| 515 |
+
return None
|
| 516 |
|
| 517 |
# Get total feedback count
|
| 518 |
+
total_count = self.feedback_collection.count_documents({})
|
| 519 |
+
|
| 520 |
+
# Get hallucination detection stats using aggregation
|
| 521 |
+
hallucination_pipeline = [
|
| 522 |
+
{"$group": {
|
| 523 |
+
"_id": "$hallucination_detected",
|
| 524 |
+
"count": {"$sum": 1}
|
| 525 |
+
}}
|
| 526 |
+
]
|
| 527 |
+
detection_stats = {doc["_id"]: doc["count"]
|
| 528 |
+
for doc in self.feedback_collection.aggregate(hallucination_pipeline)}
|
| 529 |
|
| 530 |
# Get average confidence score
|
| 531 |
+
avg_pipeline = [
|
| 532 |
+
{"$group": {
|
| 533 |
+
"_id": None,
|
| 534 |
+
"average": {"$avg": "$confidence_score"}
|
| 535 |
+
}}
|
| 536 |
+
]
|
| 537 |
+
avg_result = list(self.feedback_collection.aggregate(avg_pipeline))
|
| 538 |
+
avg_confidence = avg_result[0]["average"] if avg_result else 0
|
| 539 |
|
| 540 |
return {
|
| 541 |
"total_feedback": total_count,
|
| 542 |
+
"hallucinations_detected": detection_stats.get(True, 0),
|
| 543 |
+
"no_hallucinations": detection_stats.get(False, 0),
|
| 544 |
"average_confidence": round(avg_confidence, 2)
|
| 545 |
}
|
| 546 |
except Exception as e:
|
| 547 |
logger.error("Error getting feedback stats: %s", str(e), exc_info=True)
|
| 548 |
return None
|
| 549 |
+
|
| 550 |
+
def export_data_to_csv(self, filepath=None):
|
| 551 |
+
"""Export all feedback data to a CSV file for analysis"""
|
| 552 |
+
try:
|
| 553 |
+
if self.feedback_collection is None:
|
| 554 |
+
logger.error("MongoDB connection not available. Cannot export data.")
|
| 555 |
+
return "Database connection not available. Cannot export data."
|
| 556 |
+
|
| 557 |
+
# Query all feedback data
|
| 558 |
+
cursor = self.feedback_collection.find({})
|
| 559 |
+
|
| 560 |
+
# Convert cursor to list of dictionaries
|
| 561 |
+
records = list(cursor)
|
| 562 |
+
|
| 563 |
+
# Convert MongoDB documents to pandas DataFrame
|
| 564 |
+
# Handle nested arrays and complex objects
|
| 565 |
+
for record in records:
|
| 566 |
+
# Convert ObjectId to string
|
| 567 |
+
record['_id'] = str(record['_id'])
|
| 568 |
+
|
| 569 |
+
# Convert datetime objects to string
|
| 570 |
+
if 'timestamp' in record:
|
| 571 |
+
record['timestamp'] = record['timestamp'].strftime("%Y-%m-%d %H:%M:%S")
|
| 572 |
+
|
| 573 |
+
# Convert lists to strings for CSV storage
|
| 574 |
+
if 'paraphrased_queries' in record:
|
| 575 |
+
record['paraphrased_queries'] = json.dumps(record['paraphrased_queries'])
|
| 576 |
+
if 'paraphrased_responses' in record:
|
| 577 |
+
record['paraphrased_responses'] = json.dumps(record['paraphrased_responses'])
|
| 578 |
+
if 'conflicting_facts' in record:
|
| 579 |
+
record['conflicting_facts'] = json.dumps(record['conflicting_facts'])
|
| 580 |
+
|
| 581 |
+
# Create DataFrame
|
| 582 |
+
df = pd.DataFrame(records)
|
| 583 |
+
|
| 584 |
+
# Define default filepath if not provided
|
| 585 |
+
if not filepath:
|
| 586 |
+
filepath = os.path.join(os.path.dirname(os.path.abspath(__file__)),
|
| 587 |
+
f"hallucination_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv")
|
| 588 |
+
|
| 589 |
+
# Export to CSV
|
| 590 |
+
df.to_csv(filepath, index=False)
|
| 591 |
+
logger.info(f"Data successfully exported to {filepath}")
|
| 592 |
+
|
| 593 |
+
return filepath
|
| 594 |
+
except Exception as e:
|
| 595 |
+
logger.error(f"Error exporting data: {str(e)}", exc_info=True)
|
| 596 |
+
return f"Error exporting data: {str(e)}"
|
| 597 |
+
|
| 598 |
+
def get_recent_queries(self, limit=10):
|
| 599 |
+
"""Get most recent queries for display in the UI"""
|
| 600 |
+
try:
|
| 601 |
+
if self.feedback_collection is None:
|
| 602 |
+
logger.error("MongoDB connection not available. Cannot get recent queries.")
|
| 603 |
+
return []
|
| 604 |
+
|
| 605 |
+
# Get most recent queries
|
| 606 |
+
cursor = self.feedback_collection.find(
|
| 607 |
+
{},
|
| 608 |
+
{"original_query": 1, "hallucination_detected": 1, "timestamp": 1}
|
| 609 |
+
).sort("timestamp", pymongo.DESCENDING).limit(limit)
|
| 610 |
+
|
| 611 |
+
# Convert to list of dictionaries
|
| 612 |
+
recent_queries = []
|
| 613 |
+
for doc in cursor:
|
| 614 |
+
recent_queries.append({
|
| 615 |
+
"id": str(doc["_id"]),
|
| 616 |
+
"query": doc["original_query"],
|
| 617 |
+
"hallucination_detected": doc.get("hallucination_detected", False),
|
| 618 |
+
"timestamp": doc["timestamp"].strftime("%Y-%m-%d %H:%M:%S") if isinstance(doc["timestamp"], datetime) else doc["timestamp"]
|
| 619 |
+
})
|
| 620 |
+
|
| 621 |
+
return recent_queries
|
| 622 |
+
except Exception as e:
|
| 623 |
+
logger.error(f"Error getting recent queries: {str(e)}", exc_info=True)
|
| 624 |
+
return []
|
| 625 |
+
|
| 626 |
+
def get_query_details(self, query_id):
|
| 627 |
+
"""Get full details for a specific query by ID"""
|
| 628 |
+
try:
|
| 629 |
+
if self.feedback_collection is None:
|
| 630 |
+
logger.error("MongoDB connection not available. Cannot get query details.")
|
| 631 |
+
return None
|
| 632 |
+
|
| 633 |
+
# Convert string ID to ObjectId
|
| 634 |
+
obj_id = ObjectId(query_id)
|
| 635 |
+
|
| 636 |
+
# Find the query by ID
|
| 637 |
+
doc = self.feedback_collection.find_one({"_id": obj_id})
|
| 638 |
+
|
| 639 |
+
if doc is None:
|
| 640 |
+
logger.warning(f"No query found with ID {query_id}")
|
| 641 |
+
return None
|
| 642 |
+
|
| 643 |
+
# Convert ObjectId to string for JSON serialization
|
| 644 |
+
doc["_id"] = str(doc["_id"])
|
| 645 |
+
|
| 646 |
+
# Convert timestamp to string
|
| 647 |
+
if "timestamp" in doc and isinstance(doc["timestamp"], datetime):
|
| 648 |
+
doc["timestamp"] = doc["timestamp"].strftime("%Y-%m-%d %H:%M:%S")
|
| 649 |
+
|
| 650 |
+
return doc
|
| 651 |
+
except Exception as e:
|
| 652 |
+
logger.error(f"Error getting query details: {str(e)}", exc_info=True)
|
| 653 |
+
return None
|
| 654 |
|
| 655 |
|
| 656 |
# Progress tracking for UI updates
|
|
|
|
| 1554 |
|
| 1555 |
# Uncomment this line to run the test function instead of the main interface
|
| 1556 |
# if __name__ == "__main__":
|
| 1557 |
+
# test_progress()
|
requirements.txt
CHANGED
|
@@ -4,4 +4,6 @@ numpy
|
|
| 4 |
mistralai
|
| 5 |
openai
|
| 6 |
pydantic
|
| 7 |
-
python-dotenv
|
|
|
|
|
|
|
|
|
| 4 |
mistralai
|
| 5 |
openai
|
| 6 |
pydantic
|
| 7 |
+
python-dotenv
|
| 8 |
+
pymongo
|
| 9 |
+
dnspython
|