Spaces:
Build error
Build error
test 2
Browse files- app.py +167 -72
- requirements.txt +3 -1
app.py
CHANGED
@@ -10,35 +10,41 @@ from llama_index.embeddings import HuggingFaceEmbedding
|
|
10 |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
11 |
import torch
|
12 |
from dotenv import load_dotenv
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
# (Keep Constants as is)
|
15 |
# --- Constants ---
|
16 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
17 |
|
|
|
18 |
# --- Basic Agent Definition ---
|
19 |
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
20 |
class BasicAgent:
|
21 |
def __init__(self):
|
22 |
print("BasicAgent initialized.")
|
23 |
load_dotenv()
|
24 |
-
|
25 |
# Use official Mistral model
|
26 |
model_name = "mistralai/Mixtral-8x7B-Instruct-v0.2" # or "mistralai/Mistral-7B-Instruct-v0.2"
|
27 |
-
|
28 |
# Configure quantization for efficient CPU usage
|
29 |
quantization_config = BitsAndBytesConfig(
|
30 |
load_in_8bit=True, # 8-bit is more CPU friendly than 4-bit
|
31 |
-
bnb_4bit_compute_dtype=torch.float16
|
32 |
)
|
33 |
-
|
34 |
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
35 |
self.model = AutoModelForCausalLM.from_pretrained(
|
36 |
model_name,
|
37 |
low_cpu_mem_usage=True,
|
38 |
trust_remote_code=True,
|
39 |
-
quantization_config=quantization_config
|
40 |
)
|
41 |
-
|
42 |
# Create HuggingFaceLLM instance
|
43 |
self.llm = HuggingFaceLLM(
|
44 |
context_window=4096,
|
@@ -47,67 +53,135 @@ class BasicAgent:
|
|
47 |
model=self.model,
|
48 |
model_name=model_name,
|
49 |
device_map="auto",
|
50 |
-
temperature=0.1
|
51 |
)
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
def __call__(self, question: str) -> str:
|
56 |
print(f"Agent received question: {question[:50]}...")
|
57 |
-
|
58 |
try:
|
59 |
-
#
|
60 |
-
|
61 |
-
|
62 |
-
data_end = question.find(
|
63 |
-
data_code = question[data_start + 8:data_end].strip()
|
64 |
-
|
65 |
-
# Execute the data code in a safe context to create DataFrame
|
66 |
local_vars = {}
|
67 |
exec(data_code, {"pd": pd}, local_vars)
|
68 |
-
df = local_vars.get(
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
# Get response from model
|
88 |
-
response = self.llm.complete(prompt)
|
89 |
-
return response.text.strip()
|
90 |
-
|
91 |
-
# For non-DataFrame questions or if no DataFrame is found
|
92 |
-
prompt = f"""Answer this question clearly and concisely: {question}
|
93 |
-
Provide only the specific answer requested, no explanations."""
|
94 |
-
response = self.llm.complete(prompt)
|
95 |
-
return response.text.strip()
|
96 |
-
|
97 |
except Exception as e:
|
98 |
print(f"Error processing question: {e}")
|
99 |
return "Error occurred while processing the question"
|
100 |
|
101 |
-
|
|
|
102 |
"""
|
103 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
104 |
and displays the results.
|
105 |
"""
|
106 |
# --- Determine HF Space Runtime URL and Repo URL ---
|
107 |
-
space_id = os.getenv("SPACE_ID")
|
108 |
|
109 |
if profile:
|
110 |
-
username= f"{profile.username}"
|
111 |
print(f"User logged in: {username}")
|
112 |
else:
|
113 |
print("User not logged in.")
|
@@ -134,16 +208,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
134 |
response.raise_for_status()
|
135 |
questions_data = response.json()
|
136 |
if not questions_data:
|
137 |
-
|
138 |
-
|
139 |
print(f"Fetched {len(questions_data)} questions.")
|
140 |
except requests.exceptions.RequestException as e:
|
141 |
print(f"Error fetching questions: {e}")
|
142 |
return f"Error fetching questions: {e}", None
|
143 |
except requests.exceptions.JSONDecodeError as e:
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
except Exception as e:
|
148 |
print(f"An unexpected error occurred fetching questions: {e}")
|
149 |
return f"An unexpected error occurred fetching questions: {e}", None
|
@@ -160,18 +234,36 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
160 |
continue
|
161 |
try:
|
162 |
submitted_answer = agent(question_text)
|
163 |
-
answers_payload.append(
|
164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
except Exception as e:
|
166 |
-
|
167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
|
169 |
if not answers_payload:
|
170 |
print("Agent did not produce any answers to submit.")
|
171 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
172 |
|
173 |
-
# 4. Prepare Submission
|
174 |
-
submission_data = {
|
|
|
|
|
|
|
|
|
175 |
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
176 |
print(status_update)
|
177 |
|
@@ -241,20 +333,19 @@ with gr.Blocks() as demo:
|
|
241 |
|
242 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
243 |
|
244 |
-
status_output = gr.Textbox(
|
|
|
|
|
245 |
# Removed max_rows=10 from DataFrame constructor
|
246 |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
247 |
|
248 |
-
run_button.click(
|
249 |
-
fn=run_and_submit_all,
|
250 |
-
outputs=[status_output, results_table]
|
251 |
-
)
|
252 |
|
253 |
if __name__ == "__main__":
|
254 |
-
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
255 |
# Check for SPACE_HOST and SPACE_ID at startup for information
|
256 |
space_host_startup = os.getenv("SPACE_HOST")
|
257 |
-
space_id_startup = os.getenv("SPACE_ID")
|
258 |
|
259 |
if space_host_startup:
|
260 |
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
@@ -262,14 +353,18 @@ if __name__ == "__main__":
|
|
262 |
else:
|
263 |
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
264 |
|
265 |
-
if space_id_startup:
|
266 |
print(f"✅ SPACE_ID found: {space_id_startup}")
|
267 |
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
268 |
-
print(
|
|
|
|
|
269 |
else:
|
270 |
-
print(
|
|
|
|
|
271 |
|
272 |
-
print("-"*(60 + len(" App Starting ")) + "\n")
|
273 |
|
274 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
275 |
-
demo.launch(debug=True, share=False)
|
|
|
10 |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
11 |
import torch
|
12 |
from dotenv import load_dotenv
|
13 |
+
from typing import Optional, Dict, Any
|
14 |
+
import yt_dlp
|
15 |
+
from transformers import AutoProcessor, AutoModelForVision2Seq
|
16 |
+
from PIL import Image
|
17 |
+
import re
|
18 |
|
19 |
# (Keep Constants as is)
|
20 |
# --- Constants ---
|
21 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
22 |
|
23 |
+
|
24 |
# --- Basic Agent Definition ---
|
25 |
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
26 |
class BasicAgent:
|
27 |
def __init__(self):
|
28 |
print("BasicAgent initialized.")
|
29 |
load_dotenv()
|
30 |
+
|
31 |
# Use official Mistral model
|
32 |
model_name = "mistralai/Mixtral-8x7B-Instruct-v0.2" # or "mistralai/Mistral-7B-Instruct-v0.2"
|
33 |
+
|
34 |
# Configure quantization for efficient CPU usage
|
35 |
quantization_config = BitsAndBytesConfig(
|
36 |
load_in_8bit=True, # 8-bit is more CPU friendly than 4-bit
|
37 |
+
bnb_4bit_compute_dtype=torch.float16,
|
38 |
)
|
39 |
+
|
40 |
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
41 |
self.model = AutoModelForCausalLM.from_pretrained(
|
42 |
model_name,
|
43 |
low_cpu_mem_usage=True,
|
44 |
trust_remote_code=True,
|
45 |
+
quantization_config=quantization_config,
|
46 |
)
|
47 |
+
|
48 |
# Create HuggingFaceLLM instance
|
49 |
self.llm = HuggingFaceLLM(
|
50 |
context_window=4096,
|
|
|
53 |
model=self.model,
|
54 |
model_name=model_name,
|
55 |
device_map="auto",
|
56 |
+
temperature=0.1,
|
57 |
)
|
58 |
+
|
59 |
+
# Initialize vision model for image/video understanding
|
60 |
+
self.vision_processor = AutoProcessor.from_pretrained(
|
61 |
+
"microsoft/kosmos-2-patch14-224"
|
62 |
+
)
|
63 |
+
self.vision_model = AutoModelForVision2Seq.from_pretrained(
|
64 |
+
"microsoft/kosmos-2-patch14-224"
|
65 |
+
)
|
66 |
+
|
67 |
+
# Initialize tools
|
68 |
+
self.tools = {
|
69 |
+
"analyze_image": self.analyze_image,
|
70 |
+
"analyze_video": self.analyze_video,
|
71 |
+
"analyze_data": self.analyze_data,
|
72 |
+
}
|
73 |
+
|
74 |
+
print("Agent initialized with multimodal capabilities.")
|
75 |
+
|
76 |
+
def analyze_image(self, image_url: str) -> str:
|
77 |
+
try:
|
78 |
+
response = requests.get(image_url)
|
79 |
+
image = Image.open(BytesIO(response.content))
|
80 |
+
inputs = self.vision_processor(images=image, return_tensors="pt")
|
81 |
+
outputs = self.vision_model.generate(
|
82 |
+
pixel_values=inputs["pixel_values"], max_length=128, num_beams=5
|
83 |
+
)
|
84 |
+
return self.vision_processor.batch_decode(
|
85 |
+
outputs, skip_special_tokens=True
|
86 |
+
)[0]
|
87 |
+
except Exception as e:
|
88 |
+
print(f"Error analyzing image: {e}")
|
89 |
+
return "Error analyzing image"
|
90 |
+
|
91 |
+
def analyze_video(self, video_url: str) -> str:
|
92 |
+
try:
|
93 |
+
# Extract video info using yt-dlp
|
94 |
+
ydl_opts = {
|
95 |
+
"format": "worst", # Lowest quality to save bandwidth
|
96 |
+
"extract_flat": True,
|
97 |
+
"quiet": True,
|
98 |
+
}
|
99 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
100 |
+
info = ydl.extract_info(video_url, download=False)
|
101 |
+
|
102 |
+
# Extract relevant information
|
103 |
+
title = info.get("title", "")
|
104 |
+
description = info.get("description", "")
|
105 |
+
duration = info.get("duration", 0)
|
106 |
+
|
107 |
+
# Create prompt for the LLM
|
108 |
+
video_context = f"""
|
109 |
+
Video Title: {title}
|
110 |
+
Duration: {duration} seconds
|
111 |
+
Description: {description}
|
112 |
+
"""
|
113 |
+
|
114 |
+
return self.generate_response(video_context)
|
115 |
+
except Exception as e:
|
116 |
+
print(f"Error analyzing video: {e}")
|
117 |
+
return "Error analyzing video"
|
118 |
+
|
119 |
+
def analyze_data(self, df: pd.DataFrame, question: str) -> str:
|
120 |
+
try:
|
121 |
+
prompt = f"""Analyze this DataFrame:
|
122 |
+
Columns: {', '.join(df.columns)}
|
123 |
+
Sample: {df.head().to_string()}
|
124 |
+
Question: {question}
|
125 |
+
Provide only the numerical answer or specific value."""
|
126 |
+
|
127 |
+
return self.generate_response(prompt)
|
128 |
+
except Exception as e:
|
129 |
+
print(f"Error analyzing data: {e}")
|
130 |
+
return "Error analyzing data"
|
131 |
+
|
132 |
+
def generate_response(self, prompt: str) -> str:
|
133 |
+
try:
|
134 |
+
response = self.llm.complete(prompt)
|
135 |
+
return response.text.strip()
|
136 |
+
except Exception as e:
|
137 |
+
print(f"Error generating response: {e}")
|
138 |
+
return "Error generating response"
|
139 |
|
140 |
def __call__(self, question: str) -> str:
|
141 |
print(f"Agent received question: {question[:50]}...")
|
142 |
+
|
143 |
try:
|
144 |
+
# Check for data analysis task
|
145 |
+
if "```python" in question:
|
146 |
+
data_start = question.find("```python")
|
147 |
+
data_end = question.find("```", data_start + 8)
|
148 |
+
data_code = question[data_start + 8 : data_end].strip()
|
|
|
|
|
149 |
local_vars = {}
|
150 |
exec(data_code, {"pd": pd}, local_vars)
|
151 |
+
df = local_vars.get("df")
|
152 |
+
actual_question = question[data_end + 3 :].strip()
|
153 |
+
return self.analyze_data(df, actual_question)
|
154 |
+
|
155 |
+
# Check for video analysis task
|
156 |
+
video_pattern = r"https?://(?:www\.)?youtube\.com/\S+"
|
157 |
+
video_match = re.search(video_pattern, question)
|
158 |
+
if video_match:
|
159 |
+
return self.analyze_video(video_match.group(0))
|
160 |
+
|
161 |
+
# Check for image analysis task
|
162 |
+
image_pattern = r"https?://\S+\.(?:jpg|jpeg|png|gif)"
|
163 |
+
image_match = re.search(image_pattern, question)
|
164 |
+
if image_match:
|
165 |
+
return self.analyze_image(image_match.group(0))
|
166 |
+
|
167 |
+
# General question
|
168 |
+
return self.generate_response(question)
|
169 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
except Exception as e:
|
171 |
print(f"Error processing question: {e}")
|
172 |
return "Error occurred while processing the question"
|
173 |
|
174 |
+
|
175 |
+
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
176 |
"""
|
177 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
178 |
and displays the results.
|
179 |
"""
|
180 |
# --- Determine HF Space Runtime URL and Repo URL ---
|
181 |
+
space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
|
182 |
|
183 |
if profile:
|
184 |
+
username = f"{profile.username}"
|
185 |
print(f"User logged in: {username}")
|
186 |
else:
|
187 |
print("User not logged in.")
|
|
|
208 |
response.raise_for_status()
|
209 |
questions_data = response.json()
|
210 |
if not questions_data:
|
211 |
+
print("Fetched questions list is empty.")
|
212 |
+
return "Fetched questions list is empty or invalid format.", None
|
213 |
print(f"Fetched {len(questions_data)} questions.")
|
214 |
except requests.exceptions.RequestException as e:
|
215 |
print(f"Error fetching questions: {e}")
|
216 |
return f"Error fetching questions: {e}", None
|
217 |
except requests.exceptions.JSONDecodeError as e:
|
218 |
+
print(f"Error decoding JSON response from questions endpoint: {e}")
|
219 |
+
print(f"Response text: {response.text[:500]}")
|
220 |
+
return f"Error decoding server response for questions: {e}", None
|
221 |
except Exception as e:
|
222 |
print(f"An unexpected error occurred fetching questions: {e}")
|
223 |
return f"An unexpected error occurred fetching questions: {e}", None
|
|
|
234 |
continue
|
235 |
try:
|
236 |
submitted_answer = agent(question_text)
|
237 |
+
answers_payload.append(
|
238 |
+
{"task_id": task_id, "submitted_answer": submitted_answer}
|
239 |
+
)
|
240 |
+
results_log.append(
|
241 |
+
{
|
242 |
+
"Task ID": task_id,
|
243 |
+
"Question": question_text,
|
244 |
+
"Submitted Answer": submitted_answer,
|
245 |
+
}
|
246 |
+
)
|
247 |
except Exception as e:
|
248 |
+
print(f"Error running agent on task {task_id}: {e}")
|
249 |
+
results_log.append(
|
250 |
+
{
|
251 |
+
"Task ID": task_id,
|
252 |
+
"Question": question_text,
|
253 |
+
"Submitted Answer": f"AGENT ERROR: {e}",
|
254 |
+
}
|
255 |
+
)
|
256 |
|
257 |
if not answers_payload:
|
258 |
print("Agent did not produce any answers to submit.")
|
259 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
260 |
|
261 |
+
# 4. Prepare Submission
|
262 |
+
submission_data = {
|
263 |
+
"username": username.strip(),
|
264 |
+
"agent_code": agent_code,
|
265 |
+
"answers": answers_payload,
|
266 |
+
}
|
267 |
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
268 |
print(status_update)
|
269 |
|
|
|
333 |
|
334 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
335 |
|
336 |
+
status_output = gr.Textbox(
|
337 |
+
label="Run Status / Submission Result", lines=5, interactive=False
|
338 |
+
)
|
339 |
# Removed max_rows=10 from DataFrame constructor
|
340 |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
341 |
|
342 |
+
run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
|
|
|
|
|
|
|
343 |
|
344 |
if __name__ == "__main__":
|
345 |
+
print("\n" + "-" * 30 + " App Starting " + "-" * 30)
|
346 |
# Check for SPACE_HOST and SPACE_ID at startup for information
|
347 |
space_host_startup = os.getenv("SPACE_HOST")
|
348 |
+
space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
|
349 |
|
350 |
if space_host_startup:
|
351 |
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
|
|
353 |
else:
|
354 |
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
355 |
|
356 |
+
if space_id_startup: # Print repo URLs if SPACE_ID is found
|
357 |
print(f"✅ SPACE_ID found: {space_id_startup}")
|
358 |
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
359 |
+
print(
|
360 |
+
f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
|
361 |
+
)
|
362 |
else:
|
363 |
+
print(
|
364 |
+
"ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
|
365 |
+
)
|
366 |
|
367 |
+
print("-" * (60 + len(" App Starting ")) + "\n")
|
368 |
|
369 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
370 |
+
demo.launch(debug=True, share=False)
|
requirements.txt
CHANGED
@@ -7,4 +7,6 @@ torch
|
|
7 |
sentence-transformers
|
8 |
nltk
|
9 |
accelerate
|
10 |
-
bitsandbytes>=0.41.0
|
|
|
|
|
|
7 |
sentence-transformers
|
8 |
nltk
|
9 |
accelerate
|
10 |
+
bitsandbytes>=0.41.0
|
11 |
+
yt-dlp
|
12 |
+
Pillow
|