Spaces:

rootxhacker
/

Julia-browser-Agent

Running

App Files Files Community

rootxhacker commited on 13 days ago

Commit

dfb38af

verified ·

1 Parent(s): e10a54e

Update app.py

Browse files

Files changed (1) hide show

app.py +281 -171

app.py CHANGED Viewed

@@ -1,177 +1,291 @@
 #!/usr/bin/env python3
 import gradio as gr
 import os
-from typing import Dict, List, Any, Optional
 import json
-from datetime import datetime
 import traceback
-# CrewAI and Julia Browser imports
-from crewai import Agent, Task, Crew, Process
-from crewai_tools import BaseTool
-from crewai.llm import LLM
 from julia_browser import AgentSDK
-# Initialize browser
 browser = AgentSDK()
-class OpenWebsiteTool(BaseTool):
-    name: str = "open_website"
-    description: str = "Open a website and get page content. Input: url (string)"
-    def _run(self, url: str) -> str:
-        result = browser.open_website(url)
-        return f"Opened: {result['title']} at {url}"
-class ListElementsTool(BaseTool):
-    name: str = "list_elements"
-    description: str = "List all clickable elements and input fields on current page"
-    def _run(self) -> str:
-        elements = browser.list_elements()
-        output = []
-        for elem in elements.get("elements", []):
-            output.append(f"[{elem['id']}] {elem['type']}: {elem.get('text', 'N/A')}")
-        return f"Found {elements['total_clickable']} clickable, {elements['total_inputs']} inputs:\n" + "\n".join(output)
-class ClickElementTool(BaseTool):
-    name: str = "click_element"
-    description: str = "Click a button or link by its number ID. Input: element_id (int)"
-    def _run(self, element_id: int) -> str:
-        result = browser.click_element(element_id)
-        return f"Clicked: {result.get('element', 'Unknown')} - {result['status']}"
-class TypeTextTool(BaseTool):
-    name: str = "type_text"
-    description: str = "Type text into an input field. Input: field_id (int), text (string)"
-    def _run(self, field_id: int, text: str) -> str:
-        result = browser.type_text(field_id, text)
-        return f"Typed '{text}' into field {field_id} - {result['status']}"
-class SubmitFormTool(BaseTool):
-    name: str = "submit_form"
-    description: str = "Submit the current form with typed data"
-    def _run(self) -> str:
-        result = browser.submit_form()
-        return f"Form submitted - New page: {result.get('title', 'Unknown')}"
-class GetPageInfoTool(BaseTool):
-    name: str = "get_page_info"
-    description: str = "Get current page title, URL, and content"
-    def _run(self) -> str:
-        info = browser.get_page_info()
-        return f"Title: {info['title']}\nURL: {info['url']}\nContent: {info['content'][:300]}..."
-class ScrollDownTool(BaseTool):
-    name: str = "scroll_down"
-    description: str = "Scroll down the page. Input: chunks (int, default 1)"
-    def _run(self, chunks: int = 1) -> str:
-        result = browser.scroll_down(chunks)
-        return f"Scrolled down {chunks} chunks - Position: {result['position']}"
-class ScrollUpTool(BaseTool):
-    name: str = "scroll_up"
-    description: str = "Scroll up the page. Input: chunks (int, default 1)"
-    def _run(self, chunks: int = 1) -> str:
-        result = browser.scroll_up(chunks)
-        return f"Scrolled up {chunks} chunks - Position: {result['position']}"
-class SearchPageTool(BaseTool):
-    name: str = "search_page"
-    description: str = "Search for text within current page. Input: term (string)"
-    def _run(self, term: str) -> str:
-        result = browser.search_page(term)
-        return f"Found {result.get('matches', 0)} matches for '{term}'"
-class FollowLinkTool(BaseTool):
-    name: str = "follow_link"
-    description: str = "Navigate to a link by its number. Input: link_id (int)"
-    def _run(self, link_id: int) -> str:
-        result = browser.follow_link(link_id)
-        return f"Followed link {link_id} - Now at: {result.get('title', 'Unknown')}"
-# Initialize all tools
-browser_tools = [
-    OpenWebsiteTool(),
-    ListElementsTool(),
-    ClickElementTool(),
-    TypeTextTool(),
-    SubmitFormTool(),
-    GetPageInfoTool(),
-    ScrollDownTool(),
-    ScrollUpTool(),
-    SearchPageTool(),
-    FollowLinkTool()
-]
 class WebAutomationAgent:
     def __init__(self):
-        # Configure Groq LLM
-        self.llm = LLM(
-            model="groq/qwen2.5-32b-instruct",
-            api_key=os.getenv("GROQ_API_KEY")
-        )
-        # Create web automation agent
-        self.agent = Agent(
-            role="Web Automation Expert",
-            goal="Execute web tasks using browser tools based on user instructions",
-            backstory="""You are a skilled web automation expert who can navigate websites,
-            interact with elements, fill forms, and extract information. You break down
-            complex tasks into simple browser actions and execute them step by step.""",
-            tools=browser_tools,
-            llm=self.llm,
-            verbose=True,
-            allow_delegation=False
-        )
-    def execute_task(self, instruction: str) -> str:
-        """Execute user task"""
-        task = Task(
-            description=f"""
-            Task: {instruction}
-            Use the available browser tools to complete this task:
-            - open_website(url) - Open websites
-            - list_elements() - See what's clickable on page
-            - click_element(id) - Click buttons/links
-            - type_text(field_id, text) - Fill input fields
-            - submit_form() - Submit forms
-            - get_page_info() - Get page details
-            - scroll_down(chunks) - Scroll to see more
-            - search_page(term) - Find text on page
-            - follow_link(id) - Navigate to links
-            Work step by step and explain what you're doing.
-            """,
-            agent=self.agent,
-            expected_output="Complete step-by-step execution report with results"
-        )
-        crew = Crew(
-            agents=[self.agent],
-            tasks=[task],
-            process=Process.sequential,
-            verbose=True
-        )
         try:
-            result = crew.kickoff()
-            return str(result)
         except Exception as e:
-            return f"Error: {str(e)}\n{traceback.format_exc()}"
 # Initialize agent
-automation_agent = WebAutomationAgent()
 def execute_user_task(message: str, history: List[List[str]]) -> tuple:
     """Process user message and execute task"""
@@ -183,11 +297,11 @@ def execute_user_task(message: str, history: List[List[str]]) -> tuple:
     try:
         # Execute task
-        result = automation_agent.execute_task(message)
         # Update with result
         history[-1][1] = result
     except Exception as e:
-        history[-1][1] = f"❌ Error: {str(e)}"
     return history, ""
@@ -197,27 +311,24 @@ def clear_history():
 # Sample tasks
 sample_tasks = [
     "Open google.com and search for 'web automation'",
-    "Go to example.com and list all elements",
-    "Navigate to a news website and find the main headline",
-    "Open a shopping site and look for search functionality",
-    "Visit github.com and find the login button"
 ]
-def load_sample(task_text):
-    return task_text
 # Create Gradio Interface
 with gr.Blocks(title="AI Web Agent", theme=gr.themes.Soft()) as demo:
     gr.HTML("""
     <div style="text-align: center; margin: 20px;">
         <h1>🤖 AI Web Automation Agent</h1>
-        <p><strong>Julia Browser + CrewAI + Groq (Qwen-32B)</strong></p>
-        <p>Give me web tasks in plain English!</p>
     </div>
     """)
-    # Main chat interface - centered
     chatbot = gr.Chatbot(
         label="Agent Execution",
         height=600,
@@ -225,7 +336,7 @@ with gr.Blocks(title="AI Web Agent", theme=gr.themes.Soft()) as demo:
     )
     # Centered input section
-    with gr.Row(elem_id="input-row"):
         with gr.Column(scale=1):
             pass  # Left spacer
         with gr.Column(scale=3):
@@ -233,8 +344,7 @@ with gr.Blocks(title="AI Web Agent", theme=gr.themes.Soft()) as demo:
                 user_input = gr.Textbox(
                     placeholder="Tell me what to do on the web...",
                     container=False,
-                    scale=4,
-                    elem_id="main-input"
                 )
                 send_btn = gr.Button("🚀 Execute", variant="primary", scale=1)
@@ -251,7 +361,7 @@ with gr.Blocks(title="AI Web Agent", theme=gr.themes.Soft()) as demo:
             for i, task in enumerate(sample_tasks):
                 sample_btn = gr.Button(
-                    f"Sample {i+1}: {task[:30]}...",
                     variant="outline",
                     size="sm"
                 )
@@ -264,12 +374,12 @@ with gr.Blocks(title="AI Web Agent", theme=gr.themes.Soft()) as demo:
                 with gr.Column():
                     gr.HTML("""
                     <div style="padding: 15px; background: #f8f9fa; border-radius: 8px;">
-                        <h4>💡 Tips:</h4>
                         <ul style="font-size: 12px;">
-                            <li>Specify the website URL</li>
-                            <li>Describe what to click/type</li>
-                            <li>Ask for information extraction</li>
-                            <li>Request form interactions</li>
                         </ul>
                     </div>
                     """)
@@ -311,12 +421,12 @@ if __name__ == "__main__":
         print("⚠️ Warning: GROQ_API_KEY not found in environment variables")
         print("Set it with: export GROQ_API_KEY='your_api_key_here'")
-    print("🚀 Starting AI Web Automation Agent...")
-    print("📝 Available browser tools:")
-    for tool in browser_tools:
-        print(f"   - {tool.name}: {tool.description}")
-    # For Hugging Face Spaces, use environment port if available
     port = int(os.getenv("PORT", 7860))
     demo.launch(

 #!/usr/bin/env python3
+"""
+AI Web Agent using Julia Browser with Direct Groq Integration
+No CrewAI - Pure implementation with function calling
+"""
 import gradio as gr
 import os
 import json
+from typing import Dict, List, Any, Optional
 import traceback
+from groq import Groq
 from julia_browser import AgentSDK
+# Initialize browser and Groq client
 browser = AgentSDK()
+groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))
+class BrowserActions:
+    """Direct browser action implementations"""
+    @staticmethod
+    def open_website(url: str) -> Dict[str, Any]:
+        """Open a website"""
+        try:
+            result = browser.open_website(url)
+            return {"success": True, "message": f"Opened: {result['title']} at {url}", "data": result}
+        except Exception as e:
+            return {"success": False, "message": f"Error opening {url}: {str(e)}"}
+    @staticmethod
+    def list_elements() -> Dict[str, Any]:
+        """List all interactive elements"""
+        try:
+            elements = browser.list_elements()
+            element_list = []
+            for elem in elements.get("elements", []):
+                element_list.append(f"[{elem['id']}] {elem['type']}: {elem.get('text', 'N/A')}")
+            message = f"Found {elements['total_clickable']} clickable, {elements['total_inputs']} inputs:\n" + "\n".join(element_list)
+            return {"success": True, "message": message, "data": elements}
+        except Exception as e:
+            return {"success": False, "message": f"Error listing elements: {str(e)}"}
+    @staticmethod
+    def click_element(element_id: int) -> Dict[str, Any]:
+        """Click an element by ID"""
+        try:
+            result = browser.click_element(element_id)
+            return {"success": True, "message": f"Clicked: {result.get('element', 'Unknown')} - {result['status']}", "data": result}
+        except Exception as e:
+            return {"success": False, "message": f"Error clicking element {element_id}: {str(e)}"}
+    @staticmethod
+    def type_text(field_id: int, text: str) -> Dict[str, Any]:
+        """Type text into input field"""
+        try:
+            result = browser.type_text(field_id, text)
+            return {"success": True, "message": f"Typed '{text}' into field {field_id} - {result['status']}", "data": result}
+        except Exception as e:
+            return {"success": False, "message": f"Error typing into field {field_id}: {str(e)}"}
+    @staticmethod
+    def submit_form() -> Dict[str, Any]:
+        """Submit current form"""
+        try:
+            result = browser.submit_form()
+            return {"success": True, "message": f"Form submitted - New page: {result.get('title', 'Unknown')}", "data": result}
+        except Exception as e:
+            return {"success": False, "message": f"Error submitting form: {str(e)}"}
+    @staticmethod
+    def get_page_info() -> Dict[str, Any]:
+        """Get current page information"""
+        try:
+            info = browser.get_page_info()
+            message = f"Title: {info['title']}\nURL: {info['url']}\nContent preview: {info['content'][:200]}..."
+            return {"success": True, "message": message, "data": info}
+        except Exception as e:
+            return {"success": False, "message": f"Error getting page info: {str(e)}"}
+    @staticmethod
+    def scroll_down(chunks: int = 1) -> Dict[str, Any]:
+        """Scroll down the page"""
+        try:
+            result = browser.scroll_down(chunks)
+            return {"success": True, "message": f"Scrolled down {chunks} chunks - Position: {result['position']}", "data": result}
+        except Exception as e:
+            return {"success": False, "message": f"Error scrolling down: {str(e)}"}
+    @staticmethod
+    def search_page(term: str) -> Dict[str, Any]:
+        """Search for text on current page"""
+        try:
+            result = browser.search_page(term)
+            return {"success": True, "message": f"Found {result.get('matches', 0)} matches for '{term}'", "data": result}
+        except Exception as e:
+            return {"success": False, "message": f"Error searching page: {str(e)}"}
+# Available functions for the AI
+AVAILABLE_FUNCTIONS = {
+    "open_website": {
+        "function": BrowserActions.open_website,
+        "description": "Open a website",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "url": {"type": "string", "description": "The URL to open"}
+            },
+            "required": ["url"]
+        }
+    },
+    "list_elements": {
+        "function": BrowserActions.list_elements,
+        "description": "List all clickable elements and input fields on current page",
+        "parameters": {"type": "object", "properties": {}}
+    },
+    "click_element": {
+        "function": BrowserActions.click_element,
+        "description": "Click an element by its ID number",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "element_id": {"type": "integer", "description": "The ID number of the element to click"}
+            },
+            "required": ["element_id"]
+        }
+    },
+    "type_text": {
+        "function": BrowserActions.type_text,
+        "description": "Type text into an input field",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "field_id": {"type": "integer", "description": "The ID of the input field"},
+                "text": {"type": "string", "description": "The text to type"}
+            },
+            "required": ["field_id", "text"]
+        }
+    },
+    "submit_form": {
+        "function": BrowserActions.submit_form,
+        "description": "Submit the current form",
+        "parameters": {"type": "object", "properties": {}}
+    },
+    "get_page_info": {
+        "function": BrowserActions.get_page_info,
+        "description": "Get current page title, URL and content",
+        "parameters": {"type": "object", "properties": {}}
+    },
+    "scroll_down": {
+        "function": BrowserActions.scroll_down,
+        "description": "Scroll down the page",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "chunks": {"type": "integer", "description": "Number of chunks to scroll", "default": 1}
+            }
+        }
+    },
+    "search_page": {
+        "function": BrowserActions.search_page,
+        "description": "Search for text within the current page",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "term": {"type": "string", "description": "Text to search for"}
+            },
+            "required": ["term"]
+        }
+    }
+}
 class WebAutomationAgent:
+    """AI Web Automation Agent with direct Groq integration"""
     def __init__(self):
+        if not os.getenv("GROQ_API_KEY"):
+            raise ValueError("GROQ_API_KEY environment variable is required")
+    def execute_task(self, user_instruction: str) -> str:
+        """Execute a web automation task using function calling"""
+        # Prepare function definitions for Groq
+        functions = []
+        for name, func_info in AVAILABLE_FUNCTIONS.items():
+            functions.append({
+                "type": "function",
+                "function": {
+                    "name": name,
+                    "description": func_info["description"],
+                    "parameters": func_info["parameters"]
+                }
+            })
+        # System prompt
+        system_prompt = """You are a web automation expert. Execute the user's web automation task step by step using the available browser functions.
+Available functions:
+- open_website(url) - Open any website
+- list_elements() - See all clickable elements and inputs on page
+- click_element(element_id) - Click buttons, links by their ID number
+- type_text(field_id, text) - Type into input fields by ID
+- submit_form() - Submit forms
+- get_page_info() - Get page details
+- scroll_down(chunks) - Scroll to see more content
+- search_page(term) - Find text on current page
+Work step by step:
+1. First understand what the user wants
+2. Open the website if needed
+3. List elements to see what's available
+4. Interact with elements as needed
+5. Provide clear feedback on each step
+Always explain what you're doing and why."""
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_instruction}
+        ]
+        execution_log = []
+        max_iterations = 10
         try:
+            for iteration in range(max_iterations):
+                # Call Groq with function calling
+                response = groq_client.chat.completions.create(
+                    model="qwen2.5-32b-instruct",
+                    messages=messages,
+                    tools=functions,
+                    tool_choice="auto",
+                    max_tokens=1000,
+                    temperature=0.1
+                )
+                message = response.choices[0].message
+                # Add assistant message to conversation
+                messages.append({
+                    "role": "assistant",
+                    "content": message.content,
+                    "tool_calls": message.tool_calls
+                })
+                # Log assistant response
+                if message.content:
+                    execution_log.append(f"🤖 **AI**: {message.content}")
+                # Execute function calls if any
+                if message.tool_calls:
+                    for tool_call in message.tool_calls:
+                        function_name = tool_call.function.name
+                        function_args = json.loads(tool_call.function.arguments)
+                        execution_log.append(f"🔧 **Executing**: {function_name}({function_args})")
+                        # Execute the function
+                        if function_name in AVAILABLE_FUNCTIONS:
+                            try:
+                                result = AVAILABLE_FUNCTIONS[function_name]["function"](**function_args)
+                                execution_log.append(f"✅ **Result**: {result['message']}")
+                                # Add function result to conversation
+                                messages.append({
+                                    "role": "tool",
+                                    "tool_call_id": tool_call.id,
+                                    "content": json.dumps(result)
+                                })
+                            except Exception as e:
+                                error_msg = f"Error executing {function_name}: {str(e)}"
+                                execution_log.append(f"❌ **Error**: {error_msg}")
+                                messages.append({
+                                    "role": "tool",
+                                    "tool_call_id": tool_call.id,
+                                    "content": json.dumps({"success": False, "message": error_msg})
+                                })
+                else:
+                    # No more function calls, task completed
+                    break
+            return "\n\n".join(execution_log)
         except Exception as e:
+            return f"❌ **Error**: {str(e)}\n\n{traceback.format_exc()}"
 # Initialize agent
+agent = WebAutomationAgent()
 def execute_user_task(message: str, history: List[List[str]]) -> tuple:
     """Process user message and execute task"""
     try:
         # Execute task
+        result = agent.execute_task(message)
         # Update with result
         history[-1][1] = result
     except Exception as e:
+        history[-1][1] = f"❌ **Error**: {str(e)}"
     return history, ""
 # Sample tasks
 sample_tasks = [
     "Open google.com and search for 'web automation'",
+    "Go to example.com and list all elements on the page",
+    "Navigate to github.com and find the login button",
+    "Open a news website and get the page information",
+    "Visit stackoverflow.com and scroll down to see more content"
 ]
 # Create Gradio Interface
 with gr.Blocks(title="AI Web Agent", theme=gr.themes.Soft()) as demo:
     gr.HTML("""
     <div style="text-align: center; margin: 20px;">
         <h1>🤖 AI Web Automation Agent</h1>
+        <p><strong>Julia Browser + Direct Groq Integration (Qwen-32B)</strong></p>
+        <p>Pure implementation without CrewAI - Function calling with Groq!</p>
     </div>
     """)
+    # Main chat interface
     chatbot = gr.Chatbot(
         label="Agent Execution",
         height=600,
     )
     # Centered input section
+    with gr.Row():
         with gr.Column(scale=1):
             pass  # Left spacer
         with gr.Column(scale=3):
                 user_input = gr.Textbox(
                     placeholder="Tell me what to do on the web...",
                     container=False,
+                    scale=4
                 )
                 send_btn = gr.Button("🚀 Execute", variant="primary", scale=1)
             for i, task in enumerate(sample_tasks):
                 sample_btn = gr.Button(
+                    f"Sample {i+1}: {task[:35]}...",
                     variant="outline",
                     size="sm"
                 )
                 with gr.Column():
                     gr.HTML("""
                     <div style="padding: 15px; background: #f8f9fa; border-radius: 8px;">
+                        <h4>💡 Features:</h4>
                         <ul style="font-size: 12px;">
+                            <li>Direct Groq function calling</li>
+                            <li>No CrewAI dependencies</li>
+                            <li>Step-by-step execution</li>
+                            <li>Real browser automation</li>
                         </ul>
                     </div>
                     """)
         print("⚠️ Warning: GROQ_API_KEY not found in environment variables")
         print("Set it with: export GROQ_API_KEY='your_api_key_here'")
+    print("🚀 Starting AI Web Automation Agent (Direct Implementation)...")
+    print("📝 Available browser functions:")
+    for name, info in AVAILABLE_FUNCTIONS.items():
+        print(f"   - {name}: {info['description']}")
+    # For Hugging Face Spaces
     port = int(os.getenv("PORT", 7860))
     demo.launch(