Final_Assignment_Template

Runtime error

App Files Files Community

Final_Assignment_Template / app.py

LamiaYT

Initial commit with LlamaIndex-based agent

51e7f46 about 2 months ago

raw

history blame

17.7 kB

	# app.py
	from llama_index.llms.huggingface import HuggingFaceLLM
	from llama_index.core.agent import ReActAgent
	from llama_index.core.tools import FunctionTool
	from transformers import AutoTokenizer
	import os
	import gradio as gr
	import requests
	import pandas as pd
	import traceback
	import torch

	# Import real tool dependencies
	try:
	from duckduckgo_search import DDGS
	except ImportError:
	print("Warning: duckduckgo_search not installed. Web search will be limited.")
	DDGS = None

	try:
	from sympy import sympify
	from sympy.core.sympify import SympifyError
	except ImportError:
	print("Warning: sympy not installed. Math calculator will be limited.")
	sympify = None
	SympifyError = Exception

	# --- Constants ---
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

	# --- Advanced Agent Definition ---
	class SmartAgent:
	def __init__(self):
	print("Initializing Local LLM Agent...")

	# Check available memory and CUDA
	if torch.cuda.is_available():
	print(f"CUDA available. GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB")
	else:
	print("CUDA not available, using CPU")

	# Use a smaller, more efficient model for Hugging Face Spaces
	model_options = [
	"microsoft/DialoGPT-medium", # Much smaller, works well for chat
	"google/flan-t5-base", # Good for reasoning tasks
	"HuggingFaceH4/zephyr-7b-beta" # Original (may fail in limited memory)
	]

	model_name = model_options[1] # Start with flan-t5-base
	print(f"Attempting to load model: {model_name}")

	try:
	# Initialize with memory-efficient settings
	self.llm = HuggingFaceLLM(
	model_name=model_name,
	tokenizer_name=model_name,
	context_window=512, # Reduced context window
	max_new_tokens=128, # Reduced max tokens
	generate_kwargs={
	"temperature": 0.7,
	"do_sample": True,
	"pad_token_id": 0 # Prevent padding issues
	},
	device_map="auto",
	# Add memory optimization parameters
	model_kwargs={
	"torch_dtype": torch.float16, # Use half precision
	"low_cpu_mem_usage": True,
	"load_in_8bit": True, # Enable 8-bit quantization if available
	}
	)
	print(f"Successfully loaded model: {model_name}")

	except Exception as e:
	print(f"Failed to load {model_name}: {e}")
	# Fallback to an even smaller model
	try:
	fallback_model = "microsoft/DialoGPT-small"
	print(f"Falling back to: {fallback_model}")
	self.llm = HuggingFaceLLM(
	model_name=fallback_model,
	tokenizer_name=fallback_model,
	context_window=256,
	max_new_tokens=64,
	generate_kwargs={"temperature": 0.7, "do_sample": True},
	device_map="cpu", # Force CPU to avoid memory issues
	model_kwargs={"low_cpu_mem_usage": True}
	)
	print(f"Successfully loaded fallback model: {fallback_model}")
	except Exception as e2:
	print(f"All model loading attempts failed: {e2}")
	raise Exception("Unable to load any language model")

	# Define tools with real implementations
	self.tools = [
	FunctionTool.from_defaults(
	fn=self.web_search,
	name="web_search",
	description="Searches the web for current information using DuckDuckGo when questions require up-to-date knowledge"
	),
	FunctionTool.from_defaults(
	fn=self.math_calculator,
	name="math_calculator",
	description="Performs mathematical calculations and symbolic math using SymPy when questions involve numbers or equations"
	)
	]

	# Create ReAct agent with tools
	try:
	self.agent = ReActAgent.from_tools(
	tools=self.tools,
	llm=self.llm,
	verbose=True,
	max_iterations=3 # Limit iterations to prevent infinite loops
	)
	print("Local LLM Agent initialized successfully.")
	except Exception as e:
	print(f"Error creating ReAct agent: {e}")
	# Create a simple fallback agent
	self.agent = None
	print("Using fallback direct tool calling approach")

	def web_search(self, query: str) -> str:
	"""Real web search using DuckDuckGo"""
	print(f"Web search triggered for: {query[:50]}...")

	if not DDGS:
	return "Web search unavailable - duckduckgo_search not installed"

	try:
	with DDGS() as ddgs:
	results = list(ddgs.text(query, max_results=3))
	if results:
	formatted_results = []
	for i, r in enumerate(results, 1):
	title = r.get('title', 'No title')
	body = r.get('body', 'No description')[:200]
	url = r.get('href', '')
	formatted_results.append(f"{i}. {title}\n{body}...\nSource: {url}")
	return "\n\n".join(formatted_results)
	else:
	return "No search results found for the query."
	except Exception as e:
	print(f"Web search error: {e}")
	return f"Error during web search: {str(e)}"

	def math_calculator(self, expression: str) -> str:
	"""Safe math evaluation using SymPy"""
	print(f"Math calculation triggered for: {expression}")

	if not sympify:
	# Fallback to basic eval with safety checks
	try:
	# Only allow basic math operations
	allowed_chars = set('0123456789+-*/().^ ')
	if not all(c in allowed_chars for c in expression.replace(' ', '')):
	return "Error: Only basic math operations are allowed"
	result = eval(expression.replace('^', '**'))
	return str(result)
	except Exception as e:
	return f"Error: Could not evaluate the mathematical expression - {str(e)}"

	try:
	# Use SymPy for safe evaluation
	result = sympify(expression).evalf()
	return str(result)
	except SympifyError as e:
	return f"Error: Could not parse the mathematical expression - {str(e)}"
	except Exception as e:
	return f"Error: Calculation failed - {str(e)}"

	def __call__(self, question: str) -> str:
	print(f"Processing question (first 50 chars): {question[:50]}...")
	try:
	if self.agent:
	response = self.agent.query(question)
	return str(response)
	else:
	# Fallback: Direct tool usage based on question content
	question_lower = question.lower()
	if any(word in question_lower for word in ['calculate', 'math', 'equation', '+', '-', '*', '/', '=']):
	# Try math calculator
	math_terms = []
	for word in question.split():
	if any(char in word for char in '0123456789+-*/()'):
	math_terms.append(word)
	if math_terms:
	expression = ' '.join(math_terms)
	return self.math_calculator(expression)

	if any(word in question_lower for word in ['search', 'find', 'what is', 'current', 'latest', 'news']):
	# Try web search
	return self.web_search(question)

	# Default response
	return f"I understand you're asking: {question[:100]}... However, I'm having trouble processing this with the current model configuration. Please try rephrasing your question or breaking it into smaller parts."

	except Exception as e:
	print(f"Agent error: {str(e)}")
	print(f"Full traceback: {traceback.format_exc()}")
	return f"Error processing question: {str(e)}"


	# --- Memory cleanup function ---
	def cleanup_memory():
	"""Clean up GPU memory"""
	if torch.cuda.is_available():
	torch.cuda.empty_cache()
	print("GPU memory cleared")


	# --- Submission Logic ---
	def run_and_submit_all(profile: gr.OAuthProfile \| None):
	"""
	Fetches all questions, runs the agent on them, submits all answers,
	and displays the results.
	"""
	space_id = os.getenv("SPACE_ID")

	if profile:
	username = f"{profile.username}"
	print(f"User logged in: {username}")
	else:
	print("User not logged in.")
	return "Please Login to Hugging Face with the button.", None

	api_url = DEFAULT_API_URL
	questions_url = f"{api_url}/questions"
	submit_url = f"{api_url}/submit"

	# Clean memory before starting
	cleanup_memory()

	# Instantiate Agent
	try:
	agent = SmartAgent()
	except Exception as e:
	print(f"Error instantiating agent: {e}")
	print(f"Full traceback: {traceback.format_exc()}")
	return f"Error initializing agent: {e}", None

	agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
	print(f"Agent code URL: {agent_code}")

	# Fetch Questions
	print(f"Fetching questions from: {questions_url}")
	try:
	response = requests.get(questions_url, timeout=15)
	response.raise_for_status()
	questions_data = response.json()
	if not questions_data:
	print("Fetched questions list is empty.")
	return "Fetched questions list is empty or invalid format.", None
	print(f"Fetched {len(questions_data)} questions.")
	except requests.exceptions.RequestException as e:
	print(f"Error fetching questions: {e}")
	return f"Error fetching questions: {e}", None
	except requests.exceptions.JSONDecodeError as e:
	print(f"Error decoding JSON response from questions endpoint: {e}")
	return f"Error decoding server response for questions: {e}", None
	except Exception as e:
	print(f"An unexpected error occurred fetching questions: {e}")
	return f"An unexpected error occurred fetching questions: {e}", None

	# Run Agent on all questions
	results_log = []
	answers_payload = []
	print(f"Running agent on {len(questions_data)} questions...")

	for i, item in enumerate(questions_data, 1):
	task_id = item.get("task_id")
	question_text = item.get("question")

	if not task_id or question_text is None:
	print(f"Skipping item with missing task_id or question: {item}")
	continue

	print(f"Processing question {i}/{len(questions_data)}: {task_id}")

	try:
	submitted_answer = agent(question_text)
	answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
	results_log.append({
	"Task ID": task_id,
	"Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
	"Submitted Answer": submitted_answer[:200] + "..." if len(submitted_answer) > 200 else submitted_answer
	})
	print(f"✅ Completed question {i}: {task_id}")

	# Clean memory after each question
	if i % 5 == 0: # Every 5 questions
	cleanup_memory()

	except Exception as e:
	print(f"❌ Error running agent on task {task_id}: {e}")
	error_answer = f"AGENT ERROR: {str(e)}"
	answers_payload.append({"task_id": task_id, "submitted_answer": error_answer})
	results_log.append({
	"Task ID": task_id,
	"Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
	"Submitted Answer": error_answer
	})

	if not answers_payload:
	print("Agent did not produce any answers to submit.")
	return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

	# Prepare submission
	submission_data = {
	"username": username.strip(),
	"agent_code": agent_code,
	"answers": answers_payload
	}

	status_update = f"Agent finished processing. Submitting {len(answers_payload)} answers for user '{username}'..."
	print(status_update)

	# Submit answers
	print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
	try:
	response = requests.post(submit_url, json=submission_data, timeout=60)
	response.raise_for_status()
	result_data = response.json()

	final_status = (
	f"🎉 Submission Successful!\n\n"
	f"User: {result_data.get('username')}\n"
	f"Overall Score: {result_data.get('score', 'N/A')}% "
	f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
	f"Message: {result_data.get('message', 'No message received.')}"
	)
	print("✅ Submission successful!")
	results_df = pd.DataFrame(results_log)
	return final_status, results_df

	except requests.exceptions.HTTPError as e:
	error_detail = f"Server responded with status {e.response.status_code}."
	try:
	error_json = e.response.json()
	error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
	except requests.exceptions.JSONDecodeError:
	error_detail += f" Response: {e.response.text[:500]}"
	status_message = f"❌ Submission Failed: {error_detail}"
	print(status_message)
	results_df = pd.DataFrame(results_log)
	return status_message, results_df

	except requests.exceptions.Timeout:
	status_message = "❌ Submission Failed: The request timed out."
	print(status_message)
	results_df = pd.DataFrame(results_log)
	return status_message, results_df

	except requests.exceptions.RequestException as e:
	status_message = f"❌ Submission Failed: Network error - {e}"
	print(status_message)
	results_df = pd.DataFrame(results_log)
	return status_message, results_df

	except Exception as e:
	status_message = f"❌ An unexpected error occurred during submission: {e}"
	print(status_message)
	results_df = pd.DataFrame(results_log)
	return status_message, results_df


	# --- Gradio UI ---
	with gr.Blocks(title="Local LLM Agent Evaluation") as demo:
	gr.Markdown("# 🤖 Local LLM Agent Evaluation Runner")
	gr.Markdown(
	"""
	Instructions:
	1. 🔐 Log in to your Hugging Face account using the button below
	2. 🚀 Click 'Run Evaluation & Submit All Answers'
	3. ⏳ Wait for the local LLM to process all questions (using memory-optimized smaller model)
	4. 📊 View your results and submission status

	Features:
	- 🔍 Real web search using DuckDuckGo
	- 🧮 Advanced math calculations with SymPy
	- 🧠 Memory-optimized language model with fallback options
	- 🛡️ Error handling and recovery mechanisms
	"""
	)

	with gr.Row():
	gr.LoginButton()

	with gr.Row():
	run_button = gr.Button(
	"🚀 Run Evaluation & Submit All Answers",
	variant="primary",
	size="lg"
	)

	status_output = gr.Textbox(
	label="📋 Run Status / Submission Result",
	lines=8,
	interactive=False,
	placeholder="Click the button above to start the evaluation..."
	)

	results_table = gr.DataFrame(
	label="📊 Questions and Agent Answers",
	wrap=True,
	interactive=False
	)

	# Wire up the button
	run_button.click(
	fn=run_and_submit_all,
	outputs=[status_output, results_table]
	)


	if __name__ == "__main__":
	print("\n" + "="*60)
	print("🚀 Application Startup at", pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S"))
	print("="*60)

	space_host_startup = os.getenv("SPACE_HOST")
	space_id_startup = os.getenv("SPACE_ID")

	if space_host_startup:
	print(f"✅ SPACE_HOST found: {space_host_startup}")
	print(f" Runtime URL should be: https://{space_host_startup}")
	else:
	print("ℹ️ SPACE_HOST environment variable not found (running locally?).")

	if space_id_startup:
	print(f"✅ SPACE_ID found: {space_id_startup}")
	print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
	print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
	else:
	print("ℹ️ SPACE_ID environment variable not found (running locally?).")

	print("-" * 60)
	print("🎯 Launching Gradio Interface for Local LLM Agent Evaluation...")

	# Launch without share=True for Hugging Face Spaces
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	show_error=True
	)