import time import signal from huggingface_hub import InferenceClient # Configuration - Modèles et leurs providers MODELS = [ ("Qwen/Qwen2.5-72B-Instruct", "sambanova"), ("meta-llama/Llama-3.3-70B-Instruct", "sambanova"), ("deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "sambanova"), ("Qwen/QwQ-32B", "novita"), # ("mistralai/Mistral-Small-24B-Instruct-2501", "novita") ] QUESTION = "What is the capital of France?" TIMEOUT = 10 # secondes class TimeoutException(Exception): pass def timeout_handler(signum, frame): raise TimeoutException("Timeout") def test_model(model, provider): client = InferenceClient(provider=provider) # Configure le timeout signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(TIMEOUT) start_time = time.time() try: response = client.chat_completion( model=model, messages=[{"role": "user", "content": QUESTION}] ) result = response.choices[0].message.content success = True except TimeoutException: result = f"TIMEOUT ({TIMEOUT}s)" success = False except Exception as e: result = str(e) success = False finally: # Désactive l'alarme signal.alarm(0) execution_time = time.time() - start_time status = "✅" if success else "❌" print(f"{status} {model} ({provider}) - Temps: {execution_time:.2f}s") if success: print(f" Réponse: {result[:80]}..." if len(result) > 80 else f" Réponse: {result}") else: print(f" Erreur: {result}") return success, execution_time, result def main(): print(f"\nTest de {len(MODELS)} modèles avec leurs providers spécifiques") print(f"Question: {QUESTION}") print(f"Timeout: {TIMEOUT}s\n") results = [] for model, provider in MODELS: success, time_taken, response = test_model(model, provider) results.append({ "model": model, "provider": provider, "success": success, "time": time_taken }) print("\n=== RÉSUMÉ ===") for result in results: status = "✅" if result["success"] else "❌" print(f"{status} {result['model']} ({result['provider']}): {result['time']:.2f}s") if __name__ == "__main__": main()