Spaces:
Sleeping
Sleeping
#!/usr/bin/env python3 | |
import gradio as gr | |
import os | |
from typing import Dict, List, Any, Optional | |
import json | |
from datetime import datetime | |
import traceback | |
# CrewAI and Julia Browser imports | |
from crewai import Agent, Task, Crew, Process | |
from crewai_tools import BaseTool | |
from crewai.llm import LLM | |
from julia_browser import AgentSDK | |
# Initialize browser | |
browser = AgentSDK() | |
class OpenWebsiteTool(BaseTool): | |
name: str = "open_website" | |
description: str = "Open a website and get page content. Input: url (string)" | |
def _run(self, url: str) -> str: | |
result = browser.open_website(url) | |
return f"Opened: {result['title']} at {url}" | |
class ListElementsTool(BaseTool): | |
name: str = "list_elements" | |
description: str = "List all clickable elements and input fields on current page" | |
def _run(self) -> str: | |
elements = browser.list_elements() | |
output = [] | |
for elem in elements.get("elements", []): | |
output.append(f"[{elem['id']}] {elem['type']}: {elem.get('text', 'N/A')}") | |
return f"Found {elements['total_clickable']} clickable, {elements['total_inputs']} inputs:\n" + "\n".join(output) | |
class ClickElementTool(BaseTool): | |
name: str = "click_element" | |
description: str = "Click a button or link by its number ID. Input: element_id (int)" | |
def _run(self, element_id: int) -> str: | |
result = browser.click_element(element_id) | |
return f"Clicked: {result.get('element', 'Unknown')} - {result['status']}" | |
class TypeTextTool(BaseTool): | |
name: str = "type_text" | |
description: str = "Type text into an input field. Input: field_id (int), text (string)" | |
def _run(self, field_id: int, text: str) -> str: | |
result = browser.type_text(field_id, text) | |
return f"Typed '{text}' into field {field_id} - {result['status']}" | |
class SubmitFormTool(BaseTool): | |
name: str = "submit_form" | |
description: str = "Submit the current form with typed data" | |
def _run(self) -> str: | |
result = browser.submit_form() | |
return f"Form submitted - New page: {result.get('title', 'Unknown')}" | |
class GetPageInfoTool(BaseTool): | |
name: str = "get_page_info" | |
description: str = "Get current page title, URL, and content" | |
def _run(self) -> str: | |
info = browser.get_page_info() | |
return f"Title: {info['title']}\nURL: {info['url']}\nContent: {info['content'][:300]}..." | |
class ScrollDownTool(BaseTool): | |
name: str = "scroll_down" | |
description: str = "Scroll down the page. Input: chunks (int, default 1)" | |
def _run(self, chunks: int = 1) -> str: | |
result = browser.scroll_down(chunks) | |
return f"Scrolled down {chunks} chunks - Position: {result['position']}" | |
class ScrollUpTool(BaseTool): | |
name: str = "scroll_up" | |
description: str = "Scroll up the page. Input: chunks (int, default 1)" | |
def _run(self, chunks: int = 1) -> str: | |
result = browser.scroll_up(chunks) | |
return f"Scrolled up {chunks} chunks - Position: {result['position']}" | |
class SearchPageTool(BaseTool): | |
name: str = "search_page" | |
description: str = "Search for text within current page. Input: term (string)" | |
def _run(self, term: str) -> str: | |
result = browser.search_page(term) | |
return f"Found {result.get('matches', 0)} matches for '{term}'" | |
class FollowLinkTool(BaseTool): | |
name: str = "follow_link" | |
description: str = "Navigate to a link by its number. Input: link_id (int)" | |
def _run(self, link_id: int) -> str: | |
result = browser.follow_link(link_id) | |
return f"Followed link {link_id} - Now at: {result.get('title', 'Unknown')}" | |
# Initialize all tools | |
browser_tools = [ | |
OpenWebsiteTool(), | |
ListElementsTool(), | |
ClickElementTool(), | |
TypeTextTool(), | |
SubmitFormTool(), | |
GetPageInfoTool(), | |
ScrollDownTool(), | |
ScrollUpTool(), | |
SearchPageTool(), | |
FollowLinkTool() | |
] | |
class WebAutomationAgent: | |
def __init__(self): | |
# Configure Groq LLM | |
self.llm = LLM( | |
model="groq/qwen2.5-32b-instruct", | |
api_key=os.getenv("GROQ_API_KEY") | |
) | |
# Create web automation agent | |
self.agent = Agent( | |
role="Web Automation Expert", | |
goal="Execute web tasks using browser tools based on user instructions", | |
backstory="""You are a skilled web automation expert who can navigate websites, | |
interact with elements, fill forms, and extract information. You break down | |
complex tasks into simple browser actions and execute them step by step.""", | |
tools=browser_tools, | |
llm=self.llm, | |
verbose=True, | |
allow_delegation=False | |
) | |
def execute_task(self, instruction: str) -> str: | |
"""Execute user task""" | |
task = Task( | |
description=f""" | |
Task: {instruction} | |
Use the available browser tools to complete this task: | |
- open_website(url) - Open websites | |
- list_elements() - See what's clickable on page | |
- click_element(id) - Click buttons/links | |
- type_text(field_id, text) - Fill input fields | |
- submit_form() - Submit forms | |
- get_page_info() - Get page details | |
- scroll_down(chunks) - Scroll to see more | |
- search_page(term) - Find text on page | |
- follow_link(id) - Navigate to links | |
Work step by step and explain what you're doing. | |
""", | |
agent=self.agent, | |
expected_output="Complete step-by-step execution report with results" | |
) | |
crew = Crew( | |
agents=[self.agent], | |
tasks=[task], | |
process=Process.sequential, | |
verbose=True | |
) | |
try: | |
result = crew.kickoff() | |
return str(result) | |
except Exception as e: | |
return f"Error: {str(e)}\n{traceback.format_exc()}" | |
# Initialize agent | |
automation_agent = WebAutomationAgent() | |
def execute_user_task(message: str, history: List[List[str]]) -> tuple: | |
"""Process user message and execute task""" | |
if not message.strip(): | |
return history, "" | |
# Add user message | |
history.append([message, "π€ Executing task..."]) | |
try: | |
# Execute task | |
result = automation_agent.execute_task(message) | |
# Update with result | |
history[-1][1] = result | |
except Exception as e: | |
history[-1][1] = f"β Error: {str(e)}" | |
return history, "" | |
def clear_history(): | |
return [], "" | |
# Sample tasks | |
sample_tasks = [ | |
"Open google.com and search for 'web automation'", | |
"Go to example.com and list all elements", | |
"Navigate to a news website and find the main headline", | |
"Open a shopping site and look for search functionality", | |
"Visit github.com and find the login button" | |
] | |
def load_sample(task_text): | |
return task_text | |
# Create Gradio Interface | |
with gr.Blocks(title="AI Web Agent", theme=gr.themes.Soft()) as demo: | |
gr.HTML(""" | |
<div style="text-align: center; margin: 20px;"> | |
<h1>π€ AI Web Automation Agent</h1> | |
<p><strong>Julia Browser + CrewAI + Groq (Qwen-32B)</strong></p> | |
<p>Give me web tasks in plain English!</p> | |
</div> | |
""") | |
# Main chat interface - centered | |
chatbot = gr.Chatbot( | |
label="Agent Execution", | |
height=600, | |
show_copy_button=True | |
) | |
# Centered input section | |
with gr.Row(elem_id="input-row"): | |
with gr.Column(scale=1): | |
pass # Left spacer | |
with gr.Column(scale=3): | |
with gr.Row(): | |
user_input = gr.Textbox( | |
placeholder="Tell me what to do on the web...", | |
container=False, | |
scale=4, | |
elem_id="main-input" | |
) | |
send_btn = gr.Button("π Execute", variant="primary", scale=1) | |
clear_btn = gr.Button("ποΈ Clear", variant="secondary", size="sm") | |
with gr.Column(scale=1): | |
pass # Right spacer | |
# Sample tasks section | |
with gr.Row(): | |
with gr.Column(scale=1): | |
pass # Left spacer | |
with gr.Column(scale=2): | |
gr.HTML("<h3 style='text-align: center;'>π Sample Tasks</h3>") | |
for i, task in enumerate(sample_tasks): | |
sample_btn = gr.Button( | |
f"Sample {i+1}: {task[:30]}...", | |
variant="outline", | |
size="sm" | |
) | |
sample_btn.click( | |
lambda t=task: t, | |
outputs=user_input | |
) | |
with gr.Row(): | |
with gr.Column(): | |
gr.HTML(""" | |
<div style="padding: 15px; background: #f8f9fa; border-radius: 8px;"> | |
<h4>π‘ Tips:</h4> | |
<ul style="font-size: 12px;"> | |
<li>Specify the website URL</li> | |
<li>Describe what to click/type</li> | |
<li>Ask for information extraction</li> | |
<li>Request form interactions</li> | |
</ul> | |
</div> | |
""") | |
with gr.Column(): | |
gr.HTML(""" | |
<div style="padding: 15px; background: #e3f2fd; border-radius: 8px;"> | |
<h4>βοΈ Setup:</h4> | |
<p style="font-size: 12px;"> | |
Set GROQ_API_KEY:<br> | |
<code>export GROQ_API_KEY="gsk_..."</code> | |
</p> | |
</div> | |
""") | |
with gr.Column(scale=1): | |
pass # Right spacer | |
# Event handlers | |
send_btn.click( | |
execute_user_task, | |
inputs=[user_input, chatbot], | |
outputs=[chatbot, user_input] | |
) | |
user_input.submit( | |
execute_user_task, | |
inputs=[user_input, chatbot], | |
outputs=[chatbot, user_input] | |
) | |
clear_btn.click( | |
clear_history, | |
outputs=[chatbot, user_input] | |
) | |
if __name__ == "__main__": | |
# Check for API key | |
if not os.getenv("GROQ_API_KEY"): | |
print("β οΈ Warning: GROQ_API_KEY not found in environment variables") | |
print("Set it with: export GROQ_API_KEY='your_api_key_here'") | |
print("π Starting AI Web Automation Agent...") | |
print("π Available browser tools:") | |
for tool in browser_tools: | |
print(f" - {tool.name}: {tool.description}") | |
# For Hugging Face Spaces, use environment port if available | |
port = int(os.getenv("PORT", 7860)) | |
demo.launch( | |
server_name="0.0.0.0", | |
server_port=port, | |
share=False, | |
show_error=True | |
) |