File size: 18,126 Bytes
574b6ca
 
 
c913a81
 
d591a7a
086b425
c913a81
 
 
 
8c139ea
c913a81
 
 
757ebd9
d66e9b7
c913a81
d591a7a
c913a81
 
 
 
 
 
 
 
 
 
 
 
 
aa6f3a8
c913a81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa6f3a8
c913a81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa6f3a8
c913a81
 
 
 
 
 
 
aa6f3a8
c913a81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d66e9b7
c913a81
 
 
 
d66e9b7
c913a81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d591a7a
c913a81
d66e9b7
c913a81
 
 
 
 
 
d66e9b7
c913a81
d66e9b7
c913a81
 
 
 
 
 
d591a7a
c913a81
8c139ea
c913a81
 
 
d591a7a
c913a81
d66e9b7
c913a81
 
 
 
d591a7a
c913a81
 
 
 
 
 
 
 
d66e9b7
c913a81
 
d66e9b7
c913a81
d66e9b7
c913a81
 
 
 
 
 
 
 
 
 
 
 
d66e9b7
 
c913a81
 
 
19b7914
c913a81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eccf8e4
c913a81
aa6f3a8
d66e9b7
aa6f3a8
c913a81
 
 
 
 
 
 
 
 
a39e119
c913a81
 
 
 
 
 
 
8c139ea
c913a81
bbb34b9
c913a81
8c139ea
c913a81
 
f96a820
8c139ea
c913a81
d3c0517
c913a81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d66e9b7
d3c0517
c913a81
 
 
e80aab9
c913a81
aa6f3a8
c913a81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7963312
c913a81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7963312
d66e9b7
 
c913a81
 
 
 
 
 
 
 
 
9f29ca9
c913a81
 
 
 
 
 
 
 
aa6f3a8
d66e9b7
e80aab9
 
c913a81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
import os
import gradio as gr
import requests
import inspect
import pandas as pd
import json
import re
from typing import Dict, List, Any, Optional
import urllib.parse
from datetime import datetime
import math

# Transformers and torch imports
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

class EnhancedGAIAAgent:
    def __init__(self):
        print("Initializing Enhanced GAIA Agent with Mistral-7B...")
        
        # Initialize Mistral model
        try:
            print("Loading Mistral-7B-Instruct model...")
            self.tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
            self.model = AutoModelForCausalLM.from_pretrained(
                "mistralai/Mistral-7B-Instruct-v0.3",
                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                device_map="auto" if torch.cuda.is_available() else None
            )
            
            # Create pipeline for easier use
            self.pipe = pipeline(
                "text-generation",
                model=self.model,
                tokenizer=self.tokenizer,
                max_new_tokens=512,
                temperature=0.7,
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id
            )
            print("โœ… Mistral model loaded successfully!")
            
        except Exception as e:
            print(f"โŒ Error loading Mistral model: {e}")
            print("Falling back to basic responses...")
            self.pipe = None
            
        # Tool functions for GAIA tasks
        self.tools = {
            "calculate": self._calculate,
            "search_web": self._search_web,
            "parse_data": self._parse_data,
            "analyze_text": self._analyze_text,
            "solve_math": self._solve_math
        }
        
    def _calculate(self, expression: str) -> str:
        """Safe calculator for mathematical expressions"""
        try:
            # Clean and validate expression
            expression = re.sub(r'[^0-9+\-*/().\s]', '', expression)
            result = eval(expression)
            return str(result)
        except Exception as e:
            return f"Calculation error: {e}"
    
    def _search_web(self, query: str) -> str:
        """Simulate web search (placeholder - you'd integrate real search API)"""
        # This is a placeholder - integrate with actual search API
        return f"Search results for '{query}': [This would contain real search results]"
    
    def _parse_data(self, data: str) -> str:
        """Parse and analyze structured data"""
        try:
            # Try to parse as JSON
            if data.strip().startswith('{') or data.strip().startswith('['):
                parsed = json.loads(data)
                return f"Parsed data structure with {len(parsed) if isinstance(parsed, (list, dict)) else 1} elements"
            else:
                # Basic text analysis
                lines = data.split('\n')
                return f"Text data with {len(lines)} lines, {len(data.split())} words"
        except Exception as e:
            return f"Data parsing error: {e}"
    
    def _analyze_text(self, text: str) -> str:
        """Analyze text content"""
        words = text.split()
        sentences = text.split('.')
        return f"Text analysis: {len(words)} words, {len(sentences)} sentences"
    
    def _solve_math(self, problem: str) -> str:
        """Enhanced math problem solver"""
        try:
            # Extract numbers and operations
            numbers = re.findall(r'-?\d+\.?\d*', problem)
            
            # Handle common math patterns
            if "percent" in problem.lower() or "%" in problem:
                if len(numbers) >= 2:
                    base = float(numbers[0])
                    percent = float(numbers[1])
                    result = base * (percent / 100)
                    return str(result)
            
            if "average" in problem.lower() or "mean" in problem.lower():
                if numbers:
                    nums = [float(n) for n in numbers]
                    return str(sum(nums) / len(nums))
            
            # Default calculation
            return self._calculate(" ".join(numbers))
            
        except Exception as e:
            return f"Math solving error: {e}"
    
    def _generate_response(self, prompt: str) -> str:
        """Generate response using Mistral model"""
        if not self.pipe:
            return "Model not available - using fallback response."
        
        try:
            messages = [
                {"role": "user", "content": prompt}
            ]
            
            response = self.pipe(messages, max_new_tokens=512, temperature=0.7)
            
            # Extract the generated text
            if response and len(response) > 0:
                generated_text = response[0]['generated_text']
                # Get only the assistant's response (after the user message)
                if isinstance(generated_text, list):
                    # Find the assistant's response
                    for msg in generated_text:
                        if msg.get('role') == 'assistant':
                            return msg.get('content', '')
                elif isinstance(generated_text, str):
                    return generated_text
                else:
                    return str(generated_text)
            
            return "No response generated."
            
        except Exception as e:
            print(f"Error generating response: {e}")
            return f"Error in response generation: {e}"
    
    def _detect_task_type(self, question: str) -> str:
        """Detect the type of task to apply appropriate strategy"""
        question_lower = question.lower()
        
        if any(word in question_lower for word in ["calculate", "compute", "math", "+", "-", "*", "/", "="]):
            return "calculation"
        elif any(word in question_lower for word in ["search", "find", "lookup", "google"]):
            return "search"
        elif any(word in question_lower for word in ["data", "csv", "json", "table", "parse"]):
            return "data_analysis"
        elif any(word in question_lower for word in ["percent", "%", "average", "mean", "sum"]):
            return "math_word_problem"
        else:
            return "general_reasoning"
    
    def __call__(self, question: str) -> str:
        print(f"Agent processing question (first 100 chars): {question[:100]}...")
        
        # Detect task type
        task_type = self._detect_task_type(question)
        print(f"Detected task type: {task_type}")
        
        # Build enhanced prompt based on task type
        if task_type == "calculation":
            enhanced_prompt = f"""
You are a precise mathematical assistant. Solve this step-by-step:

Question: {question}

Provide a clear, accurate answer. If calculation is needed, show your work.
Answer:"""
            
        elif task_type == "math_word_problem":
            enhanced_prompt = f"""
You are solving a math word problem. Break it down step by step:

Question: {question}

Steps:
1. Identify what is being asked
2. Extract the relevant numbers
3. Determine the operation needed
4. Calculate the result
5. Provide the final answer

Answer:"""
            
        elif task_type == "data_analysis":
            enhanced_prompt = f"""
You are analyzing data. Approach this systematically:

Question: {question}

Consider:
- What type of data is involved?
- What analysis is needed?
- What tools or methods should be used?

Provide a clear, structured answer.
Answer:"""
            
        else:
            enhanced_prompt = f"""
You are a helpful assistant that provides accurate, well-reasoned answers.

Question: {question}

Think through this step-by-step and provide a clear, comprehensive answer.
Answer:"""
        
        # Generate response using the model
        try:
            response = self._generate_response(enhanced_prompt)
            
            # Post-process response for specific task types
            if task_type in ["calculation", "math_word_problem"]:
                # Try to extract and verify any calculations
                numbers_in_response = re.findall(r'-?\d+\.?\d*', response)
                if numbers_in_response:
                    # Attempt to verify calculation if simple enough
                    pass
            
            print(f"Agent returning response (first 100 chars): {response[:100]}...")
            return response.strip()
            
        except Exception as e:
            print(f"Error in agent processing: {e}")
            fallback_response = self._handle_fallback(question, task_type)
            return fallback_response
    
    def _handle_fallback(self, question: str, task_type: str) -> str:
        """Provide fallback responses when the main model fails"""
        if task_type == "calculation":
            # Try to extract and calculate simple expressions
            try:
                numbers = re.findall(r'-?\d+\.?\d*', question)
                if len(numbers) >= 2:
                    if "+" in question:
                        result = sum(float(n) for n in numbers)
                        return f"The sum is {result}"
                    elif "*" in question or "multiply" in question.lower():
                        result = 1
                        for n in numbers:
                            result *= float(n)
                        return f"The product is {result}"
            except:
                pass
        
        return f"I understand you're asking about: {question}. This appears to be a {task_type} task. Let me provide my best analysis based on the available information."


def run_and_submit_all(profile: gr.OAuthProfile | None):
    """
    Fetches all questions, runs the EnhancedGAIAAgent on them, submits all answers,
    and displays the results.
    """
    # --- Determine HF Space Runtime URL and Repo URL ---
    space_id = os.getenv("SPACE_ID")

    if profile:
        username = f"{profile.username}"
        print(f"User logged in: {username}")
    else:
        print("User not logged in.")
        return "Please Login to Hugging Face with the button.", None

    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"

    # 1. Instantiate Enhanced Agent
    try:
        print("Initializing Enhanced GAIA Agent...")
        agent = EnhancedGAIAAgent()
        print("โœ… Agent initialized successfully!")
    except Exception as e:
        print(f"โŒ Error instantiating agent: {e}")
        return f"Error initializing agent: {e}", None

    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
    print(f"Agent code URL: {agent_code}")

    # 2. Fetch Questions
    print(f"Fetching questions from: {questions_url}")
    try:
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
        if not questions_data:
            print("Fetched questions list is empty.")
            return "Fetched questions list is empty or invalid format.", None
        print(f"โœ… Fetched {len(questions_data)} questions.")
    except requests.exceptions.RequestException as e:
        print(f"โŒ Error fetching questions: {e}")
        return f"Error fetching questions: {e}", None
    except requests.exceptions.JSONDecodeError as e:
        print(f"โŒ Error decoding JSON response from questions endpoint: {e}")
        return f"Error decoding server response for questions: {e}", None
    except Exception as e:
        print(f"โŒ An unexpected error occurred fetching questions: {e}")
        return f"An unexpected error occurred fetching questions: {e}", None

    # 3. Run Enhanced Agent
    results_log = []
    answers_payload = []
    print(f"๐Ÿš€ Running enhanced agent on {len(questions_data)} questions...")
    
    for i, item in enumerate(questions_data, 1):
        task_id = item.get("task_id")
        question_text = item.get("question")
        
        if not task_id or question_text is None:
            print(f"โš ๏ธ  Skipping item with missing task_id or question: {item}")
            continue
            
        print(f"๐Ÿ“ Processing question {i}/{len(questions_data)} (ID: {task_id})")
        
        try:
            submitted_answer = agent(question_text)
            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
            results_log.append({
                "Task ID": task_id, 
                "Question": question_text[:200] + "..." if len(question_text) > 200 else question_text,
                "Submitted Answer": submitted_answer[:300] + "..." if len(submitted_answer) > 300 else submitted_answer
            })
            print(f"โœ… Completed question {i}")
            
        except Exception as e:
            print(f"โŒ Error running agent on task {task_id}: {e}")
            error_response = f"AGENT ERROR: {e}"
            answers_payload.append({"task_id": task_id, "submitted_answer": error_response})
            results_log.append({
                "Task ID": task_id, 
                "Question": question_text[:200] + "..." if len(question_text) > 200 else question_text,
                "Submitted Answer": error_response
            })

    if not answers_payload:
        print("โŒ Agent did not produce any answers to submit.")
        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

    # 4. Prepare Submission
    submission_data = {
        "username": username.strip(), 
        "agent_code": agent_code, 
        "answers": answers_payload
    }
    
    print(f"๐Ÿ“ค Submitting {len(answers_payload)} answers for user '{username}'...")

    # 5. Submit
    try:
        response = requests.post(submit_url, json=submission_data, timeout=120)  # Increased timeout
        response.raise_for_status()
        result_data = response.json()
        
        final_status = (
            f"๐ŸŽ‰ Submission Successful!\n"
            f"User: {result_data.get('username')}\n"
            f"Overall Score: {result_data.get('score', 'N/A')}% "
            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
            f"Message: {result_data.get('message', 'No message received.')}"
        )
        
        print("โœ… Submission successful!")
        results_df = pd.DataFrame(results_log)
        return final_status, results_df
        
    except requests.exceptions.HTTPError as e:
        error_detail = f"Server responded with status {e.response.status_code}."
        try:
            error_json = e.response.json()
            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
        except requests.exceptions.JSONDecodeError:
            error_detail += f" Response: {e.response.text[:500]}"
        status_message = f"โŒ Submission Failed: {error_detail}"
        print(status_message)
        results_df = pd.DataFrame(results_log)
        return status_message, results_df
        
    except Exception as e:
        status_message = f"โŒ An unexpected error occurred during submission: {e}"
        print(status_message)
        results_df = pd.DataFrame(results_log)
        return status_message, results_df


# --- Build Gradio Interface using Blocks ---
with gr.Blocks(title="Enhanced GAIA Agent") as demo:
    gr.Markdown("# ๐Ÿš€ Enhanced GAIA Agent with Mistral-7B")
    gr.Markdown(
        """
        **Enhanced Features:**
        - ๐Ÿง  **Mistral-7B-Instruct** for advanced reasoning
        - ๐Ÿ”ง **Tool Integration** for calculations and data processing  
        - ๐Ÿ“Š **Task Type Detection** for optimized responses
        - ๐ŸŽฏ **GAIA-Optimized** prompting strategies
        
        **Instructions:**
        1. Clone this space and ensure you have access to Mistral-7B-Instruct
        2. Log in to your Hugging Face account using the button below
        3. Click 'Run Enhanced Evaluation' to process all questions with the enhanced agent
        
        **Note:** The enhanced agent uses Mistral-7B which requires significant computational resources.
        Processing may take several minutes depending on the number of questions.
        """
    )

    with gr.Row():
        gr.LoginButton()
        
    with gr.Row():
        run_button = gr.Button("๐Ÿš€ Run Enhanced Evaluation & Submit All Answers", variant="primary")

    status_output = gr.Textbox(
        label="๐Ÿ“Š Run Status / Submission Result", 
        lines=8, 
        interactive=False
    )
    
    results_table = gr.DataFrame(
        label="๐Ÿ“ Questions and Agent Answers", 
        wrap=True,
        height=400
    )

    run_button.click(
        fn=run_and_submit_all,
        outputs=[status_output, results_table]
    )

if __name__ == "__main__":
    print("\n" + "="*50)
    print("๐Ÿš€ ENHANCED GAIA AGENT STARTING")
    print("="*50)
    
    # Environment check
    space_host = os.getenv("SPACE_HOST")
    space_id = os.getenv("SPACE_ID")
    
    if space_host:
        print(f"โœ… SPACE_HOST: {space_host}")
        print(f"๐ŸŒ Runtime URL: https://{space_host}.hf.space")
    else:
        print("โ„น๏ธ  Running locally - SPACE_HOST not found")

    if space_id:
        print(f"โœ… SPACE_ID: {space_id}")
        print(f"๐Ÿ“ Repo URL: https://huggingface.co/spaces/{space_id}")
    else:
        print("โ„น๏ธ  SPACE_ID not found")
    
    # GPU/CPU check
    if torch.cuda.is_available():
        print(f"๐ŸŽฎ GPU Available: {torch.cuda.get_device_name()}")
        print(f"๐Ÿ’พ GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
    else:
        print("๐Ÿ’ป Running on CPU (GPU not available)")
    
    print("="*50)
    print("๐Ÿš€ Launching Enhanced GAIA Agent Interface...")
    demo.launch(debug=True, share=False)