Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| from fastapi import APIRouter, HTTPException | |
| import random | |
| from datasets import load_dataset | |
| from huggingface_hub import HfApi, dataset_info | |
| import os | |
| router = APIRouter(tags=["benchmark"]) | |
| async def get_benchmark_questions(session_id: str): | |
| """ | |
| Get example questions from the generated benchmark | |
| Args: | |
| session_id: Session ID for the benchmark | |
| Returns: | |
| Dictionary with sample questions from the dataset | |
| """ | |
| try: | |
| # Dataset path on Hugging Face | |
| dataset_repo_id = f"yourbench/yourbench_{session_id}" | |
| # Initialize response | |
| response = { | |
| "success": False, | |
| "questions": [], | |
| "dataset_url": f"https://huggingface.co/datasets/{dataset_repo_id}" | |
| } | |
| # Try to load the dataset | |
| questions = [] | |
| try: | |
| # Essayer de charger les questions single-shot directement avec le nom de config | |
| single_dataset = load_dataset(dataset_repo_id, 'single_shot_questions') | |
| if single_dataset and len(single_dataset['train']) > 0: | |
| # Prendre 2 questions à partir de l'index 1 (en évitant la première question) | |
| start_idx = 1 | |
| max_questions = min(2, max(0, len(single_dataset['train']) - start_idx)) | |
| for i in range(max_questions): | |
| idx = start_idx + i | |
| questions.append({ | |
| "id": str(idx), | |
| "question": single_dataset['train'][idx].get("question", ""), | |
| "answer": single_dataset['train'][idx].get("self_answer", "No answer available"), | |
| "type": "single_shot" | |
| }) | |
| print(f"Loaded {len(questions)} single-shot questions") | |
| except Exception as e: | |
| print(f"Error loading single-shot questions: {str(e)}") | |
| try: | |
| # Essayer de charger les questions multi-hop si nécessaire | |
| if len(questions) < 2: | |
| multi_dataset = load_dataset(dataset_repo_id, 'multi_hop_questions') | |
| if multi_dataset and len(multi_dataset['train']) > 0: | |
| # Prendre les questions multi-hop pour compléter, en évitant aussi la première | |
| start_idx = 1 | |
| remaining = 2 - len(questions) | |
| max_questions = min(remaining, max(0, len(multi_dataset['train']) - start_idx)) | |
| for i in range(max_questions): | |
| idx = start_idx + i | |
| questions.append({ | |
| "id": str(idx), | |
| "question": multi_dataset['train'][idx].get("question", ""), | |
| "answer": multi_dataset['train'][idx].get("self_answer", "No answer available"), | |
| "type": "multi_hop" | |
| }) | |
| print(f"Loaded {len(questions)} multi-hop questions") | |
| except Exception as e: | |
| print(f"Error loading multi-hop questions: {str(e)}") | |
| # If we couldn't load any questions, the dataset might not exist | |
| if len(questions) == 0: | |
| # Check if we have a directory for this session locally as fallback | |
| session_dir = os.path.join("uploaded_files", session_id) | |
| if not os.path.exists(session_dir): | |
| raise HTTPException(status_code=404, detail="Dataset not found") | |
| # Update the response | |
| response["success"] = len(questions) > 0 | |
| response["questions"] = questions | |
| return response | |
| except HTTPException: | |
| # Re-raise HTTP exceptions | |
| raise | |
| except Exception as e: | |
| return { | |
| "success": False, | |
| "error": f"Error retrieving benchmark questions: {str(e)}" | |
| } |