Spaces:
Runtime error
Runtime error
# app.py - Optimized for 16GB Memory | |
from llama_index.llms.huggingface import HuggingFaceLLM | |
from llama_index.core.agent import ReActAgent | |
from llama_index.core.tools import FunctionTool | |
from transformers import AutoTokenizer | |
import os | |
import gradio as gr | |
import requests | |
import pandas as pd | |
import traceback | |
import torch | |
import re | |
# Import real tool dependencies | |
try: | |
from duckduckgo_search import DDGS | |
except ImportError: | |
print("Warning: duckduckgo_search not installed. Web search will be limited.") | |
DDGS = None | |
try: | |
from sympy import sympify, solve, simplify, N | |
from sympy.core.sympify import SympifyError | |
except ImportError: | |
print("Warning: sympy not installed. Math calculator will be limited.") | |
sympify = None | |
SympifyError = Exception | |
# --- Constants --- | |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
# --- Advanced Agent Definition --- | |
class SmartAgent: | |
def __init__(self): | |
print("Initializing Optimized LLM Agent for 16GB Memory...") | |
# Check available memory and CUDA | |
if torch.cuda.is_available(): | |
print(f"CUDA available. GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB") | |
device_map = "auto" | |
else: | |
print("CUDA not available, using CPU") | |
device_map = "cpu" | |
# Use a better model for 16GB - these are proven to work well | |
model_options = [ | |
"microsoft/DialoGPT-medium", | |
"google/flan-t5-large", # Better reasoning capability | |
"microsoft/DialoGPT-large", # Good for conversation | |
] | |
model_name = model_options[1] # flan-t5-large for better reasoning | |
print(f"Loading model: {model_name}") | |
try: | |
self.llm = HuggingFaceLLM( | |
model_name=model_name, | |
tokenizer_name=model_name, | |
context_window=2048, # Larger context for better understanding | |
max_new_tokens=512, # More tokens for detailed answers | |
generate_kwargs={ | |
"temperature": 0.1, # Very low temperature for accuracy | |
"do_sample": True, | |
"top_p": 0.95, | |
"repetition_penalty": 1.2, | |
"pad_token_id": 0, # Add explicit pad token | |
}, | |
device_map=device_map, | |
model_kwargs={ | |
"torch_dtype": torch.float16, | |
"low_cpu_mem_usage": True, | |
"trust_remote_code": True, | |
}, | |
# Better system message for instruction following | |
system_message="""You are a precise AI assistant. When asked a question: | |
1. If it needs current information, use web_search tool | |
2. If it involves calculations, use math_calculator tool | |
3. Provide direct, accurate answers | |
4. Always be specific and factual""" | |
) | |
print(f"Successfully loaded model: {model_name}") | |
except Exception as e: | |
print(f"Failed to load {model_name}: {e}") | |
# Try smaller fallback | |
fallback_model = "microsoft/DialoGPT-medium" | |
print(f"Falling back to: {fallback_model}") | |
self.llm = HuggingFaceLLM( | |
model_name=fallback_model, | |
tokenizer_name=fallback_model, | |
context_window=1024, | |
max_new_tokens=256, | |
generate_kwargs={ | |
"temperature": 0.1, | |
"do_sample": True, | |
"top_p": 0.9, | |
"repetition_penalty": 1.1, | |
}, | |
device_map=device_map, | |
model_kwargs={ | |
"torch_dtype": torch.float16, | |
"low_cpu_mem_usage": True, | |
} | |
) | |
print(f"Successfully loaded fallback model: {fallback_model}") | |
# Define tools with improved implementations | |
self.tools = [ | |
FunctionTool.from_defaults( | |
fn=self.web_search, | |
name="web_search", | |
description="Search the web for current information, facts, or recent events. Use when you need up-to-date information." | |
), | |
FunctionTool.from_defaults( | |
fn=self.math_calculator, | |
name="math_calculator", | |
description="Perform mathematical calculations, solve equations, or evaluate mathematical expressions." | |
) | |
] | |
# Create ReAct agent with better settings | |
try: | |
self.agent = ReActAgent.from_tools( | |
tools=self.tools, | |
llm=self.llm, | |
verbose=True, | |
max_iterations=5, # Allow more iterations for complex problems | |
max_function_calls=10, # Allow more tool calls | |
) | |
print("ReAct Agent initialized successfully.") | |
except Exception as e: | |
print(f"Error creating ReAct agent: {e}") | |
self.agent = None | |
def web_search(self, query: str) -> str: | |
"""Enhanced web search with better result formatting""" | |
print(f"๐ Web search: {query}") | |
if not DDGS: | |
return "Web search unavailable - duckduckgo_search not installed" | |
try: | |
with DDGS() as ddgs: | |
results = list(ddgs.text(query, max_results=8, region='wt-wt')) | |
if results: | |
# Format results more concisely for the LLM | |
formatted_results = [] | |
for i, r in enumerate(results[:5], 1): # Top 5 results | |
title = r.get('title', 'No title') | |
body = r.get('body', 'No description') | |
# Clean and truncate body | |
body = body.replace('\n', ' ').strip()[:200] | |
formatted_results.append(f"{i}. {title}: {body}") | |
search_summary = f"Search results for '{query}':\n" + "\n".join(formatted_results) | |
print(f"โ Found {len(results)} results") | |
return search_summary | |
else: | |
return f"No results found for '{query}'. Try different keywords." | |
except Exception as e: | |
print(f"โ Web search error: {e}") | |
return f"Search error for '{query}': {str(e)}" | |
def math_calculator(self, expression: str) -> str: | |
"""Enhanced math calculator with better parsing""" | |
print(f"๐งฎ Math calculation: {expression}") | |
if not sympify: | |
# Basic fallback | |
try: | |
# Clean expression | |
clean_expr = expression.replace('^', '**').replace('ร', '*').replace('รท', '/') | |
result = eval(clean_expr) | |
return f"Result: {result}" | |
except Exception as e: | |
return f"Math error: {str(e)}" | |
try: | |
# Clean and prepare expression | |
clean_expr = expression.replace('^', '**').replace('ร', '*').replace('รท', '/') | |
# Try to evaluate | |
result = sympify(clean_expr) | |
# If it's an equation, try to solve it | |
if '=' in expression: | |
# Extract variable and solve | |
parts = expression.split('=') | |
if len(parts) == 2: | |
eq = sympify(f"Eq({parts[0]}, {parts[1]})") | |
solution = solve(eq) | |
return f"Solution: {solution}" | |
# Evaluate numerically | |
numerical_result = N(result, 10) # 10 decimal places | |
return f"Result: {numerical_result}" | |
except Exception as e: | |
print(f"โ Math error: {e}") | |
return f"Could not calculate '{expression}': {str(e)}" | |
def __call__(self, question: str) -> str: | |
print(f"๐ค Processing: {question[:100]}...") | |
# Enhanced question analysis | |
question_lower = question.lower() | |
# Better detection of search needs | |
search_indicators = [ | |
'who is', 'what is', 'when did', 'where is', 'current', 'latest', 'recent', | |
'today', 'news', 'winner', 'recipient', 'nationality', 'born in', 'died', | |
'malko', 'competition', 'award', 'century', 'president', 'capital of', | |
'population of', 'founded', 'established', 'discovery', 'invented' | |
] | |
# Math detection | |
math_indicators = [ | |
'calculate', 'compute', 'solve', 'equation', 'sum', 'total', 'average', | |
'percentage', 'multiply', 'divide', 'add', 'subtract', '+', '-', '*', '/', | |
'=', 'x=', 'y=', 'find x', 'find y' | |
] | |
needs_search = any(indicator in question_lower for indicator in search_indicators) | |
needs_math = any(indicator in question_lower for indicator in math_indicators) | |
# Has numbers in question | |
has_numbers = bool(re.search(r'\d', question)) | |
if has_numbers and any(op in question for op in ['+', '-', '*', '/', '=', '^']): | |
needs_math = True | |
try: | |
if self.agent: | |
# Use ReAct agent | |
response = self.agent.query(question) | |
response_str = str(response) | |
# Check response quality | |
if len(response_str.strip()) < 10 or any(bad in response_str.lower() for bad in ['error', 'sorry', 'cannot', "don't know"]): | |
print("โ ๏ธ Agent response seems poor, trying direct approach...") | |
return self._direct_approach(question, needs_search, needs_math) | |
return response_str | |
else: | |
return self._direct_approach(question, needs_search, needs_math) | |
except Exception as e: | |
print(f"โ Agent error: {str(e)}") | |
return self._direct_approach(question, needs_search, needs_math) | |
def _direct_approach(self, question: str, needs_search: bool, needs_math: bool) -> str: | |
"""Direct tool usage when agent fails""" | |
if needs_search: | |
# Extract better search terms | |
important_words = [] | |
words = question.replace('?', '').split() | |
skip_words = {'what', 'when', 'where', 'who', 'how', 'is', 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'} | |
for word in words: | |
clean_word = word.lower().strip('.,!?;:') | |
if len(clean_word) > 2 and clean_word not in skip_words: | |
important_words.append(clean_word) | |
# Take up to 4 most important terms | |
search_query = ' '.join(important_words[:4]) | |
if search_query: | |
result = self.web_search(search_query) | |
return f"Based on web search:\n\n{result}" | |
if needs_math: | |
# Extract mathematical expressions | |
math_expressions = re.findall(r'[\d+\-*/().\s=x]+', question) | |
for expr in math_expressions: | |
if any(op in expr for op in ['+', '-', '*', '/', '=']): | |
result = self.math_calculator(expr.strip()) | |
return f"Mathematical calculation:\n{result}" | |
# Fallback: try to give a reasonable response | |
return f"I need more specific information to answer: {question[:100]}... Please provide additional context or rephrase your question." | |
def cleanup_memory(): | |
"""Clean up GPU memory""" | |
if torch.cuda.is_available(): | |
torch.cuda.empty_cache() | |
print("๐งน GPU memory cleared") | |
def run_and_submit_all(profile: gr.OAuthProfile | None): | |
"""Enhanced submission with better error handling""" | |
space_id = os.getenv("SPACE_ID") | |
if not profile: | |
return "โ Please Login to Hugging Face first.", None | |
username = f"{profile.username}" | |
print(f"๐ค User: {username}") | |
api_url = DEFAULT_API_URL | |
questions_url = f"{api_url}/questions" | |
submit_url = f"{api_url}/submit" | |
cleanup_memory() | |
# Initialize agent | |
try: | |
agent = SmartAgent() | |
except Exception as e: | |
print(f"โ Agent initialization failed: {e}") | |
return f"Failed to initialize agent: {e}", None | |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" | |
# Fetch questions | |
try: | |
response = requests.get(questions_url, timeout=30) | |
response.raise_for_status() | |
questions_data = response.json() | |
print(f"๐ Fetched {len(questions_data)} questions") | |
except Exception as e: | |
return f"โ Error fetching questions: {e}", None | |
# Process questions with better tracking | |
results_log = [] | |
answers_payload = [] | |
for i, item in enumerate(questions_data, 1): | |
task_id = item.get("task_id") | |
question_text = item.get("question") | |
if not task_id or not question_text: | |
continue | |
print(f"\n๐ Question {i}/{len(questions_data)}: {task_id}") | |
print(f"Q: {question_text[:150]}...") | |
try: | |
answer = agent(question_text) | |
# Ensure answer is not empty or generic | |
if not answer or len(answer.strip()) < 3: | |
answer = f"Unable to process question: {question_text[:50]}..." | |
answers_payload.append({ | |
"task_id": task_id, | |
"submitted_answer": answer | |
}) | |
results_log.append({ | |
"Task ID": task_id, | |
"Question": question_text[:100] + "..." if len(question_text) > 100 else question_text, | |
"Answer": answer[:150] + "..." if len(answer) > 150 else answer | |
}) | |
print(f"โ A: {answer[:100]}...") | |
# Memory cleanup every 3 questions | |
if i % 3 == 0: | |
cleanup_memory() | |
except Exception as e: | |
print(f"โ Error on {task_id}: {e}") | |
error_answer = f"Processing error: {str(e)[:100]}" | |
answers_payload.append({ | |
"task_id": task_id, | |
"submitted_answer": error_answer | |
}) | |
results_log.append({ | |
"Task ID": task_id, | |
"Question": question_text[:100] + "...", | |
"Answer": error_answer | |
}) | |
# Submit answers | |
submission_data = { | |
"username": username.strip(), | |
"agent_code": agent_code, | |
"answers": answers_payload | |
} | |
print(f"\n๐ค Submitting {len(answers_payload)} answers...") | |
try: | |
response = requests.post(submit_url, json=submission_data, timeout=120) | |
response.raise_for_status() | |
result_data = response.json() | |
score = result_data.get('score', 0) | |
correct = result_data.get('correct_count', 0) | |
total = result_data.get('total_attempted', len(answers_payload)) | |
final_status = f"""๐ Submission Complete! | |
๐ค User: {result_data.get('username')} | |
๐ Score: {score}% ({correct}/{total} correct) | |
๐ฌ {result_data.get('message', 'No message')} | |
Target: 30%+ โ {'ACHIEVED!' if score >= 30 else 'Need improvement'}""" | |
print(f"โ Final Score: {score}%") | |
return final_status, pd.DataFrame(results_log) | |
except Exception as e: | |
error_msg = f"โ Submission failed: {str(e)}" | |
print(error_msg) | |
return error_msg, pd.DataFrame(results_log) | |
# --- Gradio UI --- | |
with gr.Blocks(title="Optimized Agent Evaluation", theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# ๐ Optimized Agent for 16GB Memory") | |
gr.Markdown(""" | |
**Target: 30%+ Score** | |
**Optimizations:** | |
- ๐ง Better model selection (flan-t5-large) | |
- ๐ Enhanced web search with DuckDuckGo | |
- ๐งฎ Advanced math calculator with SymPy | |
- ๐ฏ Improved question analysis and routing | |
- ๐พ Memory management for 16GB systems | |
- ๐ง Robust error handling and fallbacks | |
""") | |
with gr.Row(): | |
gr.LoginButton(scale=1) | |
with gr.Row(): | |
run_button = gr.Button( | |
"๐ Run Optimized Evaluation", | |
variant="primary", | |
size="lg", | |
scale=2 | |
) | |
status_output = gr.Textbox( | |
label="๐ Status & Results", | |
lines=10, | |
interactive=False, | |
placeholder="Ready to run evaluation..." | |
) | |
results_table = gr.DataFrame( | |
label="๐ Detailed Results", | |
wrap=True | |
) | |
run_button.click( | |
fn=run_and_submit_all, | |
outputs=[status_output, results_table] | |
) | |
if __name__ == "__main__": | |
print("๐ Starting Optimized Agent for 16GB Memory...") | |
demo.launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
show_error=True | |
) |