rootxhacker commited on
Commit
e13333a
Β·
verified Β·
1 Parent(s): cc41526

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +327 -0
app.py ADDED
@@ -0,0 +1,327 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ import gradio as gr
4
+ import os
5
+ from typing import Dict, List, Any, Optional
6
+ import json
7
+ from datetime import datetime
8
+ import traceback
9
+
10
+ # CrewAI and Julia Browser imports
11
+ from crewai import Agent, Task, Crew, Process
12
+ from crewai_tools import BaseTool
13
+ from crewai.llm import LLM
14
+ from julia_browser import AgentSDK
15
+
16
+ # Initialize browser
17
+ browser = AgentSDK()
18
+
19
+ class OpenWebsiteTool(BaseTool):
20
+ name: str = "open_website"
21
+ description: str = "Open a website and get page content. Input: url (string)"
22
+
23
+ def _run(self, url: str) -> str:
24
+ result = browser.open_website(url)
25
+ return f"Opened: {result['title']} at {url}"
26
+
27
+ class ListElementsTool(BaseTool):
28
+ name: str = "list_elements"
29
+ description: str = "List all clickable elements and input fields on current page"
30
+
31
+ def _run(self) -> str:
32
+ elements = browser.list_elements()
33
+ output = []
34
+ for elem in elements.get("elements", []):
35
+ output.append(f"[{elem['id']}] {elem['type']}: {elem.get('text', 'N/A')}")
36
+ return f"Found {elements['total_clickable']} clickable, {elements['total_inputs']} inputs:\n" + "\n".join(output)
37
+
38
+ class ClickElementTool(BaseTool):
39
+ name: str = "click_element"
40
+ description: str = "Click a button or link by its number ID. Input: element_id (int)"
41
+
42
+ def _run(self, element_id: int) -> str:
43
+ result = browser.click_element(element_id)
44
+ return f"Clicked: {result.get('element', 'Unknown')} - {result['status']}"
45
+
46
+ class TypeTextTool(BaseTool):
47
+ name: str = "type_text"
48
+ description: str = "Type text into an input field. Input: field_id (int), text (string)"
49
+
50
+ def _run(self, field_id: int, text: str) -> str:
51
+ result = browser.type_text(field_id, text)
52
+ return f"Typed '{text}' into field {field_id} - {result['status']}"
53
+
54
+ class SubmitFormTool(BaseTool):
55
+ name: str = "submit_form"
56
+ description: str = "Submit the current form with typed data"
57
+
58
+ def _run(self) -> str:
59
+ result = browser.submit_form()
60
+ return f"Form submitted - New page: {result.get('title', 'Unknown')}"
61
+
62
+ class GetPageInfoTool(BaseTool):
63
+ name: str = "get_page_info"
64
+ description: str = "Get current page title, URL, and content"
65
+
66
+ def _run(self) -> str:
67
+ info = browser.get_page_info()
68
+ return f"Title: {info['title']}\nURL: {info['url']}\nContent: {info['content'][:300]}..."
69
+
70
+ class ScrollDownTool(BaseTool):
71
+ name: str = "scroll_down"
72
+ description: str = "Scroll down the page. Input: chunks (int, default 1)"
73
+
74
+ def _run(self, chunks: int = 1) -> str:
75
+ result = browser.scroll_down(chunks)
76
+ return f"Scrolled down {chunks} chunks - Position: {result['position']}"
77
+
78
+ class ScrollUpTool(BaseTool):
79
+ name: str = "scroll_up"
80
+ description: str = "Scroll up the page. Input: chunks (int, default 1)"
81
+
82
+ def _run(self, chunks: int = 1) -> str:
83
+ result = browser.scroll_up(chunks)
84
+ return f"Scrolled up {chunks} chunks - Position: {result['position']}"
85
+
86
+ class SearchPageTool(BaseTool):
87
+ name: str = "search_page"
88
+ description: str = "Search for text within current page. Input: term (string)"
89
+
90
+ def _run(self, term: str) -> str:
91
+ result = browser.search_page(term)
92
+ return f"Found {result.get('matches', 0)} matches for '{term}'"
93
+
94
+ class FollowLinkTool(BaseTool):
95
+ name: str = "follow_link"
96
+ description: str = "Navigate to a link by its number. Input: link_id (int)"
97
+
98
+ def _run(self, link_id: int) -> str:
99
+ result = browser.follow_link(link_id)
100
+ return f"Followed link {link_id} - Now at: {result.get('title', 'Unknown')}"
101
+
102
+ # Initialize all tools
103
+ browser_tools = [
104
+ OpenWebsiteTool(),
105
+ ListElementsTool(),
106
+ ClickElementTool(),
107
+ TypeTextTool(),
108
+ SubmitFormTool(),
109
+ GetPageInfoTool(),
110
+ ScrollDownTool(),
111
+ ScrollUpTool(),
112
+ SearchPageTool(),
113
+ FollowLinkTool()
114
+ ]
115
+
116
+ class WebAutomationAgent:
117
+ def __init__(self):
118
+ # Configure Groq LLM
119
+ self.llm = LLM(
120
+ model="groq/qwen2.5-32b-instruct",
121
+ api_key=os.getenv("GROQ_API_KEY")
122
+ )
123
+
124
+ # Create web automation agent
125
+ self.agent = Agent(
126
+ role="Web Automation Expert",
127
+ goal="Execute web tasks using browser tools based on user instructions",
128
+ backstory="""You are a skilled web automation expert who can navigate websites,
129
+ interact with elements, fill forms, and extract information. You break down
130
+ complex tasks into simple browser actions and execute them step by step.""",
131
+ tools=browser_tools,
132
+ llm=self.llm,
133
+ verbose=True,
134
+ allow_delegation=False
135
+ )
136
+
137
+ def execute_task(self, instruction: str) -> str:
138
+ """Execute user task"""
139
+ task = Task(
140
+ description=f"""
141
+ Task: {instruction}
142
+
143
+ Use the available browser tools to complete this task:
144
+ - open_website(url) - Open websites
145
+ - list_elements() - See what's clickable on page
146
+ - click_element(id) - Click buttons/links
147
+ - type_text(field_id, text) - Fill input fields
148
+ - submit_form() - Submit forms
149
+ - get_page_info() - Get page details
150
+ - scroll_down(chunks) - Scroll to see more
151
+ - search_page(term) - Find text on page
152
+ - follow_link(id) - Navigate to links
153
+
154
+ Work step by step and explain what you're doing.
155
+ """,
156
+ agent=self.agent,
157
+ expected_output="Complete step-by-step execution report with results"
158
+ )
159
+
160
+ crew = Crew(
161
+ agents=[self.agent],
162
+ tasks=[task],
163
+ process=Process.sequential,
164
+ verbose=True
165
+ )
166
+
167
+ try:
168
+ result = crew.kickoff()
169
+ return str(result)
170
+ except Exception as e:
171
+ return f"Error: {str(e)}\n{traceback.format_exc()}"
172
+
173
+ # Initialize agent
174
+ automation_agent = WebAutomationAgent()
175
+
176
+ def execute_user_task(message: str, history: List[List[str]]) -> tuple:
177
+ """Process user message and execute task"""
178
+ if not message.strip():
179
+ return history, ""
180
+
181
+ # Add user message
182
+ history.append([message, "πŸ€– Executing task..."])
183
+
184
+ try:
185
+ # Execute task
186
+ result = automation_agent.execute_task(message)
187
+ # Update with result
188
+ history[-1][1] = result
189
+ except Exception as e:
190
+ history[-1][1] = f"❌ Error: {str(e)}"
191
+
192
+ return history, ""
193
+
194
+ def clear_history():
195
+ return [], ""
196
+
197
+ # Sample tasks
198
+ sample_tasks = [
199
+ "Open google.com and search for 'web automation'",
200
+ "Go to example.com and list all elements",
201
+ "Navigate to a news website and find the main headline",
202
+ "Open a shopping site and look for search functionality",
203
+ "Visit github.com and find the login button"
204
+ ]
205
+
206
+ def load_sample(task_text):
207
+ return task_text
208
+
209
+ # Create Gradio Interface
210
+ with gr.Blocks(title="AI Web Agent", theme=gr.themes.Soft()) as demo:
211
+
212
+ gr.HTML("""
213
+ <div style="text-align: center; margin: 20px;">
214
+ <h1>πŸ€– AI Web Automation Agent</h1>
215
+ <p><strong>Julia Browser + CrewAI + Groq (Qwen-32B)</strong></p>
216
+ <p>Give me web tasks in plain English!</p>
217
+ </div>
218
+ """)
219
+
220
+ # Main chat interface - centered
221
+ chatbot = gr.Chatbot(
222
+ label="Agent Execution",
223
+ height=600,
224
+ show_copy_button=True
225
+ )
226
+
227
+ # Centered input section
228
+ with gr.Row(elem_id="input-row"):
229
+ with gr.Column(scale=1):
230
+ pass # Left spacer
231
+ with gr.Column(scale=3):
232
+ with gr.Row():
233
+ user_input = gr.Textbox(
234
+ placeholder="Tell me what to do on the web...",
235
+ container=False,
236
+ scale=4,
237
+ elem_id="main-input"
238
+ )
239
+ send_btn = gr.Button("πŸš€ Execute", variant="primary", scale=1)
240
+
241
+ clear_btn = gr.Button("πŸ—‘οΈ Clear", variant="secondary", size="sm")
242
+ with gr.Column(scale=1):
243
+ pass # Right spacer
244
+
245
+ # Sample tasks section
246
+ with gr.Row():
247
+ with gr.Column(scale=1):
248
+ pass # Left spacer
249
+ with gr.Column(scale=2):
250
+ gr.HTML("<h3 style='text-align: center;'>πŸ“‹ Sample Tasks</h3>")
251
+
252
+ for i, task in enumerate(sample_tasks):
253
+ sample_btn = gr.Button(
254
+ f"Sample {i+1}: {task[:30]}...",
255
+ variant="outline",
256
+ size="sm"
257
+ )
258
+ sample_btn.click(
259
+ lambda t=task: t,
260
+ outputs=user_input
261
+ )
262
+
263
+ with gr.Row():
264
+ with gr.Column():
265
+ gr.HTML("""
266
+ <div style="padding: 15px; background: #f8f9fa; border-radius: 8px;">
267
+ <h4>πŸ’‘ Tips:</h4>
268
+ <ul style="font-size: 12px;">
269
+ <li>Specify the website URL</li>
270
+ <li>Describe what to click/type</li>
271
+ <li>Ask for information extraction</li>
272
+ <li>Request form interactions</li>
273
+ </ul>
274
+ </div>
275
+ """)
276
+
277
+ with gr.Column():
278
+ gr.HTML("""
279
+ <div style="padding: 15px; background: #e3f2fd; border-radius: 8px;">
280
+ <h4>βš™οΈ Setup:</h4>
281
+ <p style="font-size: 12px;">
282
+ Set GROQ_API_KEY:<br>
283
+ <code>export GROQ_API_KEY="gsk_..."</code>
284
+ </p>
285
+ </div>
286
+ """)
287
+ with gr.Column(scale=1):
288
+ pass # Right spacer
289
+
290
+ # Event handlers
291
+ send_btn.click(
292
+ execute_user_task,
293
+ inputs=[user_input, chatbot],
294
+ outputs=[chatbot, user_input]
295
+ )
296
+
297
+ user_input.submit(
298
+ execute_user_task,
299
+ inputs=[user_input, chatbot],
300
+ outputs=[chatbot, user_input]
301
+ )
302
+
303
+ clear_btn.click(
304
+ clear_history,
305
+ outputs=[chatbot, user_input]
306
+ )
307
+
308
+ if __name__ == "__main__":
309
+ # Check for API key
310
+ if not os.getenv("GROQ_API_KEY"):
311
+ print("⚠️ Warning: GROQ_API_KEY not found in environment variables")
312
+ print("Set it with: export GROQ_API_KEY='your_api_key_here'")
313
+
314
+ print("πŸš€ Starting AI Web Automation Agent...")
315
+ print("πŸ“ Available browser tools:")
316
+ for tool in browser_tools:
317
+ print(f" - {tool.name}: {tool.description}")
318
+
319
+ # For Hugging Face Spaces, use environment port if available
320
+ port = int(os.getenv("PORT", 7860))
321
+
322
+ demo.launch(
323
+ server_name="0.0.0.0",
324
+ server_port=port,
325
+ share=False,
326
+ show_error=True
327
+ )