Spaces:
Runtime error
Runtime error
# app.py | |
from llama_index.llms.huggingface import HuggingFaceLLM | |
from llama_index.core.agent import ReActAgent | |
from llama_index.core.tools import FunctionTool | |
from transformers import AutoTokenizer | |
import os | |
import gradio as gr | |
import requests | |
import pandas as pd | |
import traceback | |
import torch | |
# Import real tool dependencies | |
try: | |
from duckduckgo_search import DDGS | |
except ImportError: | |
print("Warning: duckduckgo_search not installed. Web search will be limited.") | |
DDGS = None | |
try: | |
from sympy import sympify | |
from sympy.core.sympify import SympifyError | |
except ImportError: | |
print("Warning: sympy not installed. Math calculator will be limited.") | |
sympify = None | |
SympifyError = Exception | |
# --- Constants --- | |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
# --- Advanced Agent Definition --- | |
class SmartAgent: | |
def __init__(self): | |
print("Initializing Local LLM Agent...") | |
# Check available memory and CUDA | |
if torch.cuda.is_available(): | |
print(f"CUDA available. GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB") | |
else: | |
print("CUDA not available, using CPU") | |
# Use a smaller, more efficient model for Hugging Face Spaces | |
model_options = [ | |
"microsoft/DialoGPT-medium", # Much smaller, works well for chat | |
"google/flan-t5-base", # Good for reasoning tasks | |
"HuggingFaceH4/zephyr-7b-beta" # Original (may fail in limited memory) | |
] | |
model_name = model_options[1] # Start with flan-t5-base | |
print(f"Attempting to load model: {model_name}") | |
try: | |
# Initialize with memory-efficient settings | |
self.llm = HuggingFaceLLM( | |
model_name=model_name, | |
tokenizer_name=model_name, | |
context_window=512, # Reduced context window | |
max_new_tokens=128, # Reduced max tokens | |
generate_kwargs={ | |
"temperature": 0.7, | |
"do_sample": True, | |
"pad_token_id": 0 # Prevent padding issues | |
}, | |
device_map="auto", | |
# Add memory optimization parameters | |
model_kwargs={ | |
"torch_dtype": torch.float16, # Use half precision | |
"low_cpu_mem_usage": True, | |
"load_in_8bit": True, # Enable 8-bit quantization if available | |
} | |
) | |
print(f"Successfully loaded model: {model_name}") | |
except Exception as e: | |
print(f"Failed to load {model_name}: {e}") | |
# Fallback to an even smaller model | |
try: | |
fallback_model = "microsoft/DialoGPT-small" | |
print(f"Falling back to: {fallback_model}") | |
self.llm = HuggingFaceLLM( | |
model_name=fallback_model, | |
tokenizer_name=fallback_model, | |
context_window=256, | |
max_new_tokens=64, | |
generate_kwargs={"temperature": 0.7, "do_sample": True}, | |
device_map="cpu", # Force CPU to avoid memory issues | |
model_kwargs={"low_cpu_mem_usage": True} | |
) | |
print(f"Successfully loaded fallback model: {fallback_model}") | |
except Exception as e2: | |
print(f"All model loading attempts failed: {e2}") | |
raise Exception("Unable to load any language model") | |
# Define tools with real implementations | |
self.tools = [ | |
FunctionTool.from_defaults( | |
fn=self.web_search, | |
name="web_search", | |
description="Searches the web for current information using DuckDuckGo when questions require up-to-date knowledge" | |
), | |
FunctionTool.from_defaults( | |
fn=self.math_calculator, | |
name="math_calculator", | |
description="Performs mathematical calculations and symbolic math using SymPy when questions involve numbers or equations" | |
) | |
] | |
# Create ReAct agent with tools | |
try: | |
self.agent = ReActAgent.from_tools( | |
tools=self.tools, | |
llm=self.llm, | |
verbose=True, | |
max_iterations=3 # Limit iterations to prevent infinite loops | |
) | |
print("Local LLM Agent initialized successfully.") | |
except Exception as e: | |
print(f"Error creating ReAct agent: {e}") | |
# Create a simple fallback agent | |
self.agent = None | |
print("Using fallback direct tool calling approach") | |
def web_search(self, query: str) -> str: | |
"""Real web search using DuckDuckGo""" | |
print(f"Web search triggered for: {query[:50]}...") | |
if not DDGS: | |
return "Web search unavailable - duckduckgo_search not installed" | |
try: | |
with DDGS() as ddgs: | |
results = list(ddgs.text(query, max_results=3)) | |
if results: | |
formatted_results = [] | |
for i, r in enumerate(results, 1): | |
title = r.get('title', 'No title') | |
body = r.get('body', 'No description')[:200] | |
url = r.get('href', '') | |
formatted_results.append(f"{i}. {title}\n{body}...\nSource: {url}") | |
return "\n\n".join(formatted_results) | |
else: | |
return "No search results found for the query." | |
except Exception as e: | |
print(f"Web search error: {e}") | |
return f"Error during web search: {str(e)}" | |
def math_calculator(self, expression: str) -> str: | |
"""Safe math evaluation using SymPy""" | |
print(f"Math calculation triggered for: {expression}") | |
if not sympify: | |
# Fallback to basic eval with safety checks | |
try: | |
# Only allow basic math operations | |
allowed_chars = set('0123456789+-*/().^ ') | |
if not all(c in allowed_chars for c in expression.replace(' ', '')): | |
return "Error: Only basic math operations are allowed" | |
result = eval(expression.replace('^', '**')) | |
return str(result) | |
except Exception as e: | |
return f"Error: Could not evaluate the mathematical expression - {str(e)}" | |
try: | |
# Use SymPy for safe evaluation | |
result = sympify(expression).evalf() | |
return str(result) | |
except SympifyError as e: | |
return f"Error: Could not parse the mathematical expression - {str(e)}" | |
except Exception as e: | |
return f"Error: Calculation failed - {str(e)}" | |
def __call__(self, question: str) -> str: | |
print(f"Processing question (first 50 chars): {question[:50]}...") | |
try: | |
if self.agent: | |
response = self.agent.query(question) | |
return str(response) | |
else: | |
# Fallback: Direct tool usage based on question content | |
question_lower = question.lower() | |
if any(word in question_lower for word in ['calculate', 'math', 'equation', '+', '-', '*', '/', '=']): | |
# Try math calculator | |
math_terms = [] | |
for word in question.split(): | |
if any(char in word for char in '0123456789+-*/()'): | |
math_terms.append(word) | |
if math_terms: | |
expression = ' '.join(math_terms) | |
return self.math_calculator(expression) | |
if any(word in question_lower for word in ['search', 'find', 'what is', 'current', 'latest', 'news']): | |
# Try web search | |
return self.web_search(question) | |
# Default response | |
return f"I understand you're asking: {question[:100]}... However, I'm having trouble processing this with the current model configuration. Please try rephrasing your question or breaking it into smaller parts." | |
except Exception as e: | |
print(f"Agent error: {str(e)}") | |
print(f"Full traceback: {traceback.format_exc()}") | |
return f"Error processing question: {str(e)}" | |
# --- Memory cleanup function --- | |
def cleanup_memory(): | |
"""Clean up GPU memory""" | |
if torch.cuda.is_available(): | |
torch.cuda.empty_cache() | |
print("GPU memory cleared") | |
# --- Submission Logic --- | |
def run_and_submit_all(profile: gr.OAuthProfile | None): | |
""" | |
Fetches all questions, runs the agent on them, submits all answers, | |
and displays the results. | |
""" | |
space_id = os.getenv("SPACE_ID") | |
if profile: | |
username = f"{profile.username}" | |
print(f"User logged in: {username}") | |
else: | |
print("User not logged in.") | |
return "Please Login to Hugging Face with the button.", None | |
api_url = DEFAULT_API_URL | |
questions_url = f"{api_url}/questions" | |
submit_url = f"{api_url}/submit" | |
# Clean memory before starting | |
cleanup_memory() | |
# Instantiate Agent | |
try: | |
agent = SmartAgent() | |
except Exception as e: | |
print(f"Error instantiating agent: {e}") | |
print(f"Full traceback: {traceback.format_exc()}") | |
return f"Error initializing agent: {e}", None | |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" | |
print(f"Agent code URL: {agent_code}") | |
# Fetch Questions | |
print(f"Fetching questions from: {questions_url}") | |
try: | |
response = requests.get(questions_url, timeout=15) | |
response.raise_for_status() | |
questions_data = response.json() | |
if not questions_data: | |
print("Fetched questions list is empty.") | |
return "Fetched questions list is empty or invalid format.", None | |
print(f"Fetched {len(questions_data)} questions.") | |
except requests.exceptions.RequestException as e: | |
print(f"Error fetching questions: {e}") | |
return f"Error fetching questions: {e}", None | |
except requests.exceptions.JSONDecodeError as e: | |
print(f"Error decoding JSON response from questions endpoint: {e}") | |
return f"Error decoding server response for questions: {e}", None | |
except Exception as e: | |
print(f"An unexpected error occurred fetching questions: {e}") | |
return f"An unexpected error occurred fetching questions: {e}", None | |
# Run Agent on all questions | |
results_log = [] | |
answers_payload = [] | |
print(f"Running agent on {len(questions_data)} questions...") | |
for i, item in enumerate(questions_data, 1): | |
task_id = item.get("task_id") | |
question_text = item.get("question") | |
if not task_id or question_text is None: | |
print(f"Skipping item with missing task_id or question: {item}") | |
continue | |
print(f"Processing question {i}/{len(questions_data)}: {task_id}") | |
try: | |
submitted_answer = agent(question_text) | |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer}) | |
results_log.append({ | |
"Task ID": task_id, | |
"Question": question_text[:100] + "..." if len(question_text) > 100 else question_text, | |
"Submitted Answer": submitted_answer[:200] + "..." if len(submitted_answer) > 200 else submitted_answer | |
}) | |
print(f"โ Completed question {i}: {task_id}") | |
# Clean memory after each question | |
if i % 5 == 0: # Every 5 questions | |
cleanup_memory() | |
except Exception as e: | |
print(f"โ Error running agent on task {task_id}: {e}") | |
error_answer = f"AGENT ERROR: {str(e)}" | |
answers_payload.append({"task_id": task_id, "submitted_answer": error_answer}) | |
results_log.append({ | |
"Task ID": task_id, | |
"Question": question_text[:100] + "..." if len(question_text) > 100 else question_text, | |
"Submitted Answer": error_answer | |
}) | |
if not answers_payload: | |
print("Agent did not produce any answers to submit.") | |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log) | |
# Prepare submission | |
submission_data = { | |
"username": username.strip(), | |
"agent_code": agent_code, | |
"answers": answers_payload | |
} | |
status_update = f"Agent finished processing. Submitting {len(answers_payload)} answers for user '{username}'..." | |
print(status_update) | |
# Submit answers | |
print(f"Submitting {len(answers_payload)} answers to: {submit_url}") | |
try: | |
response = requests.post(submit_url, json=submission_data, timeout=60) | |
response.raise_for_status() | |
result_data = response.json() | |
final_status = ( | |
f"๐ Submission Successful!\n\n" | |
f"User: {result_data.get('username')}\n" | |
f"Overall Score: {result_data.get('score', 'N/A')}% " | |
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n" | |
f"Message: {result_data.get('message', 'No message received.')}" | |
) | |
print("โ Submission successful!") | |
results_df = pd.DataFrame(results_log) | |
return final_status, results_df | |
except requests.exceptions.HTTPError as e: | |
error_detail = f"Server responded with status {e.response.status_code}." | |
try: | |
error_json = e.response.json() | |
error_detail += f" Detail: {error_json.get('detail', e.response.text)}" | |
except requests.exceptions.JSONDecodeError: | |
error_detail += f" Response: {e.response.text[:500]}" | |
status_message = f"โ Submission Failed: {error_detail}" | |
print(status_message) | |
results_df = pd.DataFrame(results_log) | |
return status_message, results_df | |
except requests.exceptions.Timeout: | |
status_message = "โ Submission Failed: The request timed out." | |
print(status_message) | |
results_df = pd.DataFrame(results_log) | |
return status_message, results_df | |
except requests.exceptions.RequestException as e: | |
status_message = f"โ Submission Failed: Network error - {e}" | |
print(status_message) | |
results_df = pd.DataFrame(results_log) | |
return status_message, results_df | |
except Exception as e: | |
status_message = f"โ An unexpected error occurred during submission: {e}" | |
print(status_message) | |
results_df = pd.DataFrame(results_log) | |
return status_message, results_df | |
# --- Gradio UI --- | |
with gr.Blocks(title="Local LLM Agent Evaluation") as demo: | |
gr.Markdown("# ๐ค Local LLM Agent Evaluation Runner") | |
gr.Markdown( | |
""" | |
**Instructions:** | |
1. ๐ Log in to your Hugging Face account using the button below | |
2. ๐ Click 'Run Evaluation & Submit All Answers' | |
3. โณ Wait for the local LLM to process all questions (using memory-optimized smaller model) | |
4. ๐ View your results and submission status | |
**Features:** | |
- ๐ Real web search using DuckDuckGo | |
- ๐งฎ Advanced math calculations with SymPy | |
- ๐ง Memory-optimized language model with fallback options | |
- ๐ก๏ธ Error handling and recovery mechanisms | |
""" | |
) | |
with gr.Row(): | |
gr.LoginButton() | |
with gr.Row(): | |
run_button = gr.Button( | |
"๐ Run Evaluation & Submit All Answers", | |
variant="primary", | |
size="lg" | |
) | |
status_output = gr.Textbox( | |
label="๐ Run Status / Submission Result", | |
lines=8, | |
interactive=False, | |
placeholder="Click the button above to start the evaluation..." | |
) | |
results_table = gr.DataFrame( | |
label="๐ Questions and Agent Answers", | |
wrap=True, | |
interactive=False | |
) | |
# Wire up the button | |
run_button.click( | |
fn=run_and_submit_all, | |
outputs=[status_output, results_table] | |
) | |
if __name__ == "__main__": | |
print("\n" + "="*60) | |
print("๐ Application Startup at", pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S")) | |
print("="*60) | |
space_host_startup = os.getenv("SPACE_HOST") | |
space_id_startup = os.getenv("SPACE_ID") | |
if space_host_startup: | |
print(f"โ SPACE_HOST found: {space_host_startup}") | |
print(f" Runtime URL should be: https://{space_host_startup}") | |
else: | |
print("โน๏ธ SPACE_HOST environment variable not found (running locally?).") | |
if space_id_startup: | |
print(f"โ SPACE_ID found: {space_id_startup}") | |
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}") | |
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main") | |
else: | |
print("โน๏ธ SPACE_ID environment variable not found (running locally?).") | |
print("-" * 60) | |
print("๐ฏ Launching Gradio Interface for Local LLM Agent Evaluation...") | |
# Launch without share=True for Hugging Face Spaces | |
demo.launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
show_error=True | |
) |