File size: 14,447 Bytes
580bcf5
e51386e
580bcf5
0fda38b
e51386e
f1c2e53
 
 
 
 
e51386e
0fda38b
 
72146a4
f1c2e53
 
bbe4b6b
 
 
 
 
f1c2e53
bbe4b6b
 
 
 
 
f1c2e53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0fda38b
72146a4
 
 
 
 
 
f1c2e53
72146a4
f1c2e53
0fda38b
 
e51386e
8ac5ef4
f1c2e53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e51386e
0fda38b
e51386e
 
72146a4
f1c2e53
e51386e
f1c2e53
 
 
72146a4
f1c2e53
 
 
 
 
 
 
 
 
 
 
 
e51386e
f1c2e53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2828102
f1c2e53
 
0fda38b
580bcf5
 
 
72146a4
 
 
 
e51386e
 
 
 
8ac5ef4
580bcf5
0fda38b
e51386e
0fda38b
 
72146a4
f1c2e53
 
c2e1cfe
72146a4
 
c2e1cfe
 
 
 
72146a4
c2e1cfe
 
 
 
 
 
 
 
 
 
 
 
72146a4
c2e1cfe
72146a4
c2e1cfe
 
 
e51386e
0fda38b
e51386e
580bcf5
 
e51386e
f1c2e53
580bcf5
e51386e
8ac5ef4
f1c2e53
 
 
0fda38b
580bcf5
 
 
e51386e
 
8ac5ef4
f1c2e53
8ac5ef4
580bcf5
e51386e
f1c2e53
580bcf5
0fda38b
580bcf5
 
 
8ac5ef4
 
e51386e
f1c2e53
 
 
580bcf5
e51386e
580bcf5
f1c2e53
 
580bcf5
 
e51386e
f1c2e53
580bcf5
e51386e
580bcf5
f1c2e53
 
 
 
0fda38b
580bcf5
 
 
e51386e
 
580bcf5
 
e51386e
 
f1c2e53
 
e51386e
f1c2e53
e51386e
 
 
580bcf5
8ac5ef4
580bcf5
 
e51386e
f1c2e53
 
 
580bcf5
f1c2e53
 
 
0fda38b
580bcf5
 
 
 
 
8ac5ef4
580bcf5
8ac5ef4
f1c2e53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0fda38b
e51386e
72146a4
e51386e
0fda38b
 
 
 
 
72146a4
0fda38b
e51386e
580bcf5
 
e51386e
580bcf5
 
 
 
 
 
 
 
 
 
e51386e
580bcf5
 
 
 
 
 
e51386e
580bcf5
 
e51386e
580bcf5
 
e51386e
 
 
 
580bcf5
e51386e
580bcf5
 
0fda38b
e51386e
0fda38b
 
 
 
e51386e
580bcf5
 
e51386e
 
580bcf5
e51386e
580bcf5
e51386e
 
 
 
f1c2e53
 
 
 
 
 
 
 
 
8ac5ef4
580bcf5
f1c2e53
 
 
 
bbe4b6b
 
f1c2e53
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
import os
import sys
import json
import traceback
from typing import List, Dict
import warnings

# Suppress warnings for cleaner output
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)

import gradio as gr

# --- Environment variable setup to fix permission issues ---
def setup_environment():
    env_vars = {
        "NLTK_DATA": "/tmp/nltk_data",
        "MPLCONFIGDIR": "/tmp/matplotlib_cache",
        "HF_HOME": "/tmp/huggingface_cache",
        "TORCH_HOME": "/tmp/torch_cache",
        "TRANSFORMERS_CACHE": "/tmp/huggingface_cache"
    }

    for var, path in env_vars.items():
        os.environ[var] = path
        os.makedirs(path, exist_ok=True)

    
    for var, path in env_vars.items():
        os.environ[var] = path
        # Create directory if it doesn't exist
        try:
            os.makedirs(path, exist_ok=True)
            print(f"βœ… Created/verified directory: {path}")
        except PermissionError:
            print(f"⚠️ Permission denied for {path}, using /tmp fallback")
            fallback_path = f"/tmp/{var.lower()}"
            os.environ[var] = fallback_path
            os.makedirs(fallback_path, exist_ok=True)
        except Exception as e:
            print(f"❌ Error setting up {var}: {e}")

# Setup environment first
setup_environment()

# Import nltk AFTER setting environment variables
try:
    import nltk
    # Download required NLTK data upfront
    nltk.download('punkt', download_dir=os.environ["NLTK_DATA"], quiet=True)
    nltk.download('stopwords', download_dir=os.environ["NLTK_DATA"], quiet=True)
    print("βœ… NLTK data downloaded successfully")
except Exception as e:
    print(f"⚠️ NLTK setup warning: {e}")

# Add current directory to path for local imports
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

# Import dependencies with better error handling
try:
    from utils.gaia_api import GaiaAPI
    print("βœ… GaiaAPI imported successfully")
except ImportError as e:
    print(f"⚠️ Failed to import GaiaAPI: {e}")
    # Create a fallback GaiaAPI
    class GaiaAPI:
        @classmethod
        def get_questions(cls):
            return [{"task_id": "fallback", "question": "What is 2+2?"}]
        @classmethod 
        def get_random_question(cls):
            return {"task_id": "fallback", "question": "What is 2+2?"}
        @classmethod
        def submit_answers(cls, username, code_url, answers):
            return {"error": "GaiaAPI not available", "score": 0}

# Initialize global agent state
AGENT_READY = False
agent = None
initialization_error = None
agent_info = {}

def initialize_agent():
    """Initialize the LlamaIndex agent with comprehensive error handling"""
    global agent, AGENT_READY, initialization_error, agent_info
    
    try:
        print("πŸ”„ Starting agent initialization...")
        
        # Import agent-related modules
        print("πŸ“¦ Importing modules...")
        from agent.local_llm import LocalLLM
        from agent.tools import gaia_tools
        from llama_index.core.agent import ReActAgent
        from llama_index.core.memory import ChatMemoryBuffer
        
        agent_info["modules_imported"] = True
        print("βœ… All modules imported successfully!")

        print("πŸ€– Initializing Local LLM...")
        local_llm = LocalLLM()
        llm = local_llm.get_llm()
        agent_info["llm_type"] = llm.__class__.__name__
        
        print("🧠 Creating ReAct Agent...")
        memory = ChatMemoryBuffer.from_defaults(token_limit=2000)
        
        # Check if we have a proper LLM or mock
        if hasattr(llm, 'chat') and llm.__class__.__name__ != 'MockLLM':
            agent = ReActAgent.from_tools(
                tools=gaia_tools,
                llm=llm,
                memory=memory,
                verbose=True,
                max_iterations=3
            )
            agent_info["agent_type"] = "ReActAgent"
            print("βœ… ReAct Agent initialized successfully!")
        else:
            agent = llm  # Use the mock LLM directly
            agent_info["agent_type"] = "MockLLM"
            print("⚠️ Using mock mode - agent partially ready")
        
        agent_info["tools_count"] = len(gaia_tools) if 'gaia_tools' in locals() else 0
        AGENT_READY = True
        print("πŸŽ‰ Agent initialization complete!")
        
    except Exception as e:
        error_msg = f"Failed to initialize agent: {str(e)}"
        print(f"❌ {error_msg}")
        traceback.print_exc()
        AGENT_READY = False
        agent = None
        initialization_error = error_msg
        agent_info["error"] = error_msg

# Initialize agent
initialize_agent()

def process_single_question(question_text: str) -> str:
    """Process a single GAIA question through the agent"""
    if not AGENT_READY:
        error_msg = "❌ Agent not ready. "
        if initialization_error:
            error_msg += f"Error: {initialization_error}"
        return error_msg

    if not question_text.strip():
        return "❌ Please enter a question."

    try:
        enhanced_prompt = f"""
Answer the following question directly and concisely. Do not include "FINAL ANSWER" or any other prefixes in your response. Just provide the answer.

Question: {question_text}
"""
        
        print(f"πŸ€” Processing question: {question_text[:50]}...")
        
        # FIXED: Use .complete() instead of .chat() to avoid chat template errors
        if hasattr(agent, 'query'):
            response = agent.query(enhanced_prompt)
        elif hasattr(agent, 'complete'):
            # Use complete() method for models without chat templates
            response = agent.complete(enhanced_prompt)
            answer = response.text if hasattr(response, 'text') else str(response)
        elif hasattr(agent, 'chat'):
            # Only use chat if it's the MockLLM or a proper chat model
            try:
                response = agent.chat([{"role": "user", "content": enhanced_prompt}])
                answer = response.message.content if hasattr(response, 'message') else str(response)
            except Exception as chat_error:
                # Fallback to complete if chat fails
                print(f"⚠️ Chat method failed, trying complete: {chat_error}")
                if hasattr(agent, 'complete'):
                    response = agent.complete(enhanced_prompt)
                    answer = response.text if hasattr(response, 'text') else str(response)
                else:
                    raise chat_error
        else:
            answer = "Mock response: I would analyze this question and provide an answer."
        
        # Clean up the answer if it wasn't already processed above
        if 'answer' not in locals():
            answer = str(response).strip()

        # Remove common prefixes from the answer
        for prefix in ["FINAL ANSWER:", "Answer:", "The answer is:", "Final answer:"]:
            if answer.startswith(prefix):
                answer = answer[len(prefix):].strip()

        print(f"βœ… Generated answer: {answer[:50]}...")
        return answer

    except Exception as e:
        error_msg = f"❌ Error processing question: {str(e)}"
        print(error_msg)
        return error_msg

def process_all_questions() -> str:
    """Process all GAIA questions and prepare answers for submission"""
    if not AGENT_READY:
        return "❌ Agent not ready."

    try:
        print("πŸ“₯ Fetching all GAIA questions...")
        questions = GaiaAPI.get_questions()
        processed_answers = []

        print(f"πŸ”„ Processing {len(questions)} questions...")
        for i, question in enumerate(questions):
            print(f"Processing question {i + 1}/{len(questions)}: {question['task_id']}")
            answer = process_single_question(question['question'])
            processed_answers.append({
                "task_id": question['task_id'],
                "submitted_answer": answer
            })

        # Save answers to file
        output_file = "/app/gaia_answers.json"
        with open(output_file, "w") as f:
            json.dump(processed_answers, f, indent=2)

        summary = f"βœ… Processed {len(processed_answers)} questions.\n"
        summary += f"πŸ’Ύ Answers saved to {output_file}\n"
        summary += "πŸ“‹ First 3 answers:\n"
        for ans in processed_answers[:3]:
            summary += f"- {ans['task_id']}: {ans['submitted_answer'][:50]}...\n"

        print(summary)
        return summary

    except Exception as e:
        error_msg = f"❌ Error processing questions: {str(e)}"
        print(error_msg)
        traceback.print_exc()
        return error_msg

def submit_to_gaia(username: str, code_url: str) -> str:
    """Submit answers to GAIA benchmark"""
    if not AGENT_READY:
        return "❌ Agent not ready."

    if not username or not code_url:
        return "❌ Please provide both username and code URL."

    try:
        answers_file = "/app/gaia_answers.json"
        with open(answers_file, "r") as f:
            answers = json.load(f)
        print(f"πŸ“€ Submitting {len(answers)} answers...")
    except FileNotFoundError:
        return "❌ No processed answers found. Please process them first."

    try:
        result = GaiaAPI.submit_answers(username, code_url, answers)
        if "error" in result:
            return f"❌ Submission failed: {result['error']}"
        score = result.get("score", "Unknown")
        success_msg = f"βœ… Submission successful!\nπŸ“Š Score: {score}"
        print(success_msg)
        return success_msg
    except Exception as e:
        error_msg = f"❌ Submission error: {str(e)}"
        print(error_msg)
        return error_msg

def get_sample_question() -> str:
    """Load a sample question for testing"""
    try:
        question = GaiaAPI.get_random_question()
        return question['question']
    except Exception as e:
        return f"Error loading sample question: {str(e)}"

def get_system_status() -> str:
    """Get detailed system status for debugging"""
    status = "πŸ” System Status:\n\n"
    
    # Agent status
    status += f"πŸ€– Agent Ready: {'βœ… Yes' if AGENT_READY else '❌ No'}\n"
    if initialization_error:
        status += f"❌ Error: {initialization_error}\n"
    
    # Agent info
    status += f"🧠 LLM Type: {agent_info.get('llm_type', 'Unknown')}\n"
    status += f"πŸ”§ Agent Type: {agent_info.get('agent_type', 'Unknown')}\n"
    status += f"πŸ› οΈ Tools Count: {agent_info.get('tools_count', 0)}\n"
    
    # Environment
    status += "\nπŸ“ Environment Variables:\n"
    for var in ["NLTK_DATA", "HF_HOME", "MPLCONFIGDIR", "TORCH_HOME"]:
        path = os.environ.get(var, 'Not set')
        exists = "βœ…" if os.path.exists(path) else "❌"
        status += f"  {var}: {path} {exists}\n"
    
    # Directory permissions
    status += "\nπŸ“‚ Directory Status:\n"
    for path in ["/app", "/tmp"]:
        try:
            writable = os.access(path, os.W_OK)
            status += f"  {path}: {'βœ… Writable' if writable else '❌ Not writable'}\n"
        except:
            status += f"  {path}: ❌ Error checking\n"
    
    return status

# ---------- Gradio UI ----------
with gr.Blocks(title="πŸ¦™ GAIA LlamaIndex Agent", theme=gr.themes.Soft()) as demo:
    gr.Markdown(f"""
# πŸ¦™ GAIA Benchmark Agent with LlamaIndex

This agent uses LlamaIndex with a local LLM to tackle GAIA benchmark questions.

**Status:** {"βœ… Ready" if AGENT_READY else "❌ Not Ready"}
{f"**Error:** {initialization_error}" if initialization_error else ""}
""")

    with gr.Tab("πŸ”¬ Test Single Question"):
        gr.Markdown("Test the agent with individual questions")

        with gr.Row():
            with gr.Column():
                question_input = gr.Textbox(
                    label="Question",
                    placeholder="Enter a GAIA question or click 'Load Sample'",
                    lines=3
                )
                with gr.Row():
                    sample_btn = gr.Button("🎲 Load Sample Question")
                    process_btn = gr.Button("πŸš€ Process Question", variant="primary")

            with gr.Column():
                answer_output = gr.Textbox(
                    label="Agent Answer",
                    lines=5,
                    interactive=False
                )

        sample_btn.click(get_sample_question, outputs=question_input)
        process_btn.click(process_single_question, inputs=question_input, outputs=answer_output)

    with gr.Tab("πŸ“Š Full Evaluation"):
        gr.Markdown("Process all GAIA questions and prepare for submission")

        process_all_btn = gr.Button("πŸ”„ Process All Questions", variant="primary")
        processing_output = gr.Textbox(label="Processing Status", lines=10, interactive=False)

        process_all_btn.click(process_all_questions, outputs=processing_output)

    with gr.Tab("πŸ† Submit to GAIA"):
        gr.Markdown("""
Submit your processed answers to the GAIA benchmark for official scoring.

**Requirements:**
1. Your Hugging Face username
2. Link to your Space code (e.g., https://huggingface.co/spaces/your-username/gaia-agent)
""")

        with gr.Row():
            with gr.Column():
                username_input = gr.Textbox(label="HF Username", placeholder="your-username")
                code_url_input = gr.Textbox(label="Space Code URL", placeholder="https://huggingface.co/spaces/your-username/gaia-agent")
                submit_btn = gr.Button("🎯 Submit to GAIA", variant="primary")

            with gr.Column():
                submission_output = gr.Textbox(label="Submission Result", lines=5, interactive=False)

        submit_btn.click(submit_to_gaia, inputs=[username_input, code_url_input], outputs=submission_output)

    with gr.Tab("ℹ️ System Status"):
        gr.Markdown("## System Information and Debugging")
        
        refresh_btn = gr.Button("πŸ”„ Refresh Status")
        status_output = gr.Textbox(label="System Status", lines=20, interactive=False)
        
        # Load initial status
        demo.load(get_system_status, outputs=status_output)
        refresh_btn.click(get_system_status, outputs=status_output)

if __name__ == "__main__":
    print("πŸš€ Starting Gradio interface...")
    demo.launch(
        server_name="0.0.0.0", 
        server_port=7860, 
        show_error=True
        
    )