Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 4,000 Bytes
970eef1 47f7bc8 eee5a9a 47f7bc8 970eef1 ebdfd67 970eef1 c750639 970eef1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
from fastapi import APIRouter, HTTPException
import random
from datasets import load_dataset
from huggingface_hub import HfApi, dataset_info
import os
router = APIRouter(tags=["benchmark"])
@router.get("/benchmark-questions/{session_id}")
async def get_benchmark_questions(session_id: str):
"""
Get example questions from the generated benchmark
Args:
session_id: Session ID for the benchmark
Returns:
Dictionary with sample questions from the dataset
"""
try:
# Dataset path on Hugging Face
dataset_repo_id = f"yourbench/yourbench_{session_id}"
# Initialize response
response = {
"success": False,
"questions": [],
"dataset_url": f"https://huggingface.co/datasets/{dataset_repo_id}"
}
# Try to load the dataset
questions = []
try:
# Essayer de charger les questions single-shot directement avec le nom de config
single_dataset = load_dataset(dataset_repo_id, 'single_shot_questions')
if single_dataset and len(single_dataset['train']) > 0:
# Prendre 2 questions à partir de l'index 1 (en évitant la première question)
start_idx = 1
max_questions = min(5, max(0, len(single_dataset['train']) - start_idx))
for i in range(max_questions):
idx = start_idx + i
questions.append({
"id": str(idx),
"question": single_dataset['train'][idx].get("question", ""),
"answer": single_dataset['train'][idx].get("self_answer", "No answer available"),
"type": "single_shot"
})
print(f"Loaded {len(questions)} single-shot questions")
except Exception as e:
print(f"Error loading single-shot questions: {str(e)}")
# try:
# # Essayer de charger les questions multi-hop si nécessaire
# if len(questions) < 2:
# multi_dataset = load_dataset(dataset_repo_id, 'multi_hop_questions')
# if multi_dataset and len(multi_dataset['train']) > 0:
# # Prendre les questions multi-hop pour compléter, en évitant aussi la première
# start_idx = 1
# remaining = 2 - len(questions)
# max_questions = min(remaining, max(0, len(multi_dataset['train']) - start_idx))
# for i in range(max_questions):
# idx = start_idx + i
# questions.append({
# "id": str(idx),
# "question": multi_dataset['train'][idx].get("question", ""),
# "answer": multi_dataset['train'][idx].get("self_answer", "No answer available"),
# "type": "multi_hop"
# })
# print(f"Loaded {len(questions)} multi-hop questions")
# except Exception as e:
# print(f"Error loading multi-hop questions: {str(e)}")
# If we couldn't load any questions, the dataset might not exist
if len(questions) == 0:
# Check if we have a directory for this session locally as fallback
session_dir = os.path.join("uploaded_files", session_id)
if not os.path.exists(session_dir):
raise HTTPException(status_code=404, detail="Dataset not found")
# Update the response
response["success"] = len(questions) > 0
response["questions"] = questions
return response
except HTTPException:
# Re-raise HTTP exceptions
raise
except Exception as e:
return {
"success": False,
"error": f"Error retrieving benchmark questions: {str(e)}"
} |