Spaces:

ttomy
/

proxy-lite-demo-for-setup

Paused

App Files Files Community

XanderJC commited on Feb 17

Commit

2cedb9d

1 Parent(s): dcaf5c9

default endpoint

Browse files

Files changed (6) hide show

src/proxy_lite/agents/__init__.py +0 -3
src/proxy_lite/agents/browser_agent.py +0 -133
src/proxy_lite/configs/default.yaml +2 -2
src/proxy_lite/runner.py +5 -11
src/proxy_lite/solvers/__init__.py +0 -3
src/proxy_lite/solvers/structured_solver.py +0 -178

src/proxy_lite/agents/__init__.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from typing import Union
 from .agent_base import Agents, BaseAgent, BaseAgentConfig
-from .browser_agent import BrowserAgent, BrowserAgentConfig
 from .proxy_lite_agent import ProxyLiteAgent, ProxyLiteAgentConfig
 AgentTypes = Union[*list(Agents._agent_registry.values())]
@@ -14,8 +13,6 @@ __all__ = [
     "Agents",
     "BaseAgent",
     "BaseAgentConfig",
-    "BrowserAgent",
-    "BrowserAgentConfig",
     "ProxyLiteAgent",
     "ProxyLiteAgentConfig",
 ]

 from typing import Union
 from .agent_base import Agents, BaseAgent, BaseAgentConfig
 from .proxy_lite_agent import ProxyLiteAgent, ProxyLiteAgentConfig
 AgentTypes = Union[*list(Agents._agent_registry.values())]
     "Agents",
     "BaseAgent",
     "BaseAgentConfig",
     "ProxyLiteAgent",
     "ProxyLiteAgentConfig",
 ]

src/proxy_lite/agents/browser_agent.py DELETED Viewed

@@ -1,133 +0,0 @@
-from datetime import datetime
-from functools import cached_property
-from typing import Literal
-from pydantic import Field
-from proxy_lite.agents.agent_base import Agents, BaseAgent, BaseAgentConfig
-from proxy_lite.history import MessageHistory, MessageLabel, SystemMessage, Text
-from proxy_lite.tools import Tool
-BROWSER_AGENT_SYSTEM_PROMPT = """ **You are Proxy Lite, the Web-Browsing Agent.** You are developed by Convergence.
-**Current date:** {date_time_with_day}.
-You are given:
-1. A user task that you are trying to complete.
-2. Relevant facts we have at our disposal.
-3. A high level plan to complete the task.
-4. A history of previous actions and observations.
-5. An annotated webpage screenshot and text description of what's visible in the browser before and after the last action.
-## Objective
-You are an expert at controlling the web browser.
-You will be assisting a user with a task they are trying to complete on the web.
-## Web Screenshots
-Each iteration of your browsing loop, you'll be provided with a screenshot of the browser.
-The screenshot will have red rectangular annotations. These annotations highlight the marked elements you can interact with.
-## Mark IDs
-Each annotated element is labeled with a "mark id" in the top-left corner.
-When using tools like typing or clicking, specify the "mark id" to indicate which element you want to interact with.
-If an element is not annotated, you cannot interact with it. This is a limitation of the software. Focus on marked elements only.
-## Text Snippets
-Along with the screenshot, you will receive text snippets describing each annotated element.
-Here’s an example of different element types:
-- [0] `<a>text</a>` → Mark 0 is a link (`<a>` tag) containing the text "text".
-- [1] `<button>text</button>` → Mark 1 is a button (`<button>` tag) containing the text "text".
-- [2] `<input value="text"/>` → Mark 2 is an input field (`<input>` tag) with the value "text".
-- [3] `<select>text</select>` → Mark 3 is a dropdown menu (`<select>` tag) with the option "text" selected.
-- [4] `<textarea>text</textarea>` → Mark 4 is a text area (`<textarea>` tag) containing the text "text".
-- [5] `<li>text</li>` → Mark 5 is a list item (`<li>` tag) containing the text "text".
-- [6] `<div scrollable>text</div>` → Mark 6 is a division (`<div>` tag) containing the text "text" and is scrollable.
-- [7] `<td>text</td>` → Mark 7 is a table cell (`<td>` tag) containing the text "text".
-Note that these text snippets may be incomplete.
-## History
-You will see your past actions and observations but not old annotated webpages.
-This means annotated webpages showing useful information will not be visible in future actions.
-To get around this, key details from each webpage are stored in observations.
-## Web Browser Actions
-You can only take the following actions with the web browser:
-{tool_descriptions}
-## Important Browsing Tips
-If there is a modal overlay that is unresponsive on the page try reloading the webpage.
-If there is a cookie consent form covering part of the page just click accept on the form.
-When typing into a text field be sure to click one of the dropdown options (when present). Not selecting a dropdown option will result in the field being cleared after the next action.
-You do not have access any internet accounts (outside of those provided by the user).
-The browser has a built in CAPTCHA solver, if you are asked to solve one just wait and it will be solved for you.
-## Don't Repeat the Same Actions Continuously
-If you find yourself repeating an action without making progress, try another action.
-## Task
-You will now be connected to the user, who will give you their task."""  # noqa: E501
-MAX_MESSAGES_FOR_CONTEXT_WINDOW = {
-    MessageLabel.SCREENSHOT: 1,
-    # MessageLabel.REASONING_INDUCTION: 1,
-    # MessageLabel.FORMAT_INSTRUCTIONS: 1,
-    # MessageLabel.ACTION: 1,
-}
-@Agents.register_agent_config("browser")
-class BrowserAgentConfig(BaseAgentConfig):
-    name: Literal["browser"] = "browser"
-    history_messages_limit: dict[MessageLabel, int] = Field(
-        default_factory=lambda: MAX_MESSAGES_FOR_CONTEXT_WINDOW,
-    )
-@Agents.register_agent("browser")
-class BrowserAgent(BaseAgent):
-    config: BrowserAgentConfig
-    message_label: MessageLabel = MessageLabel.AGENT_MODEL_RESPONSE
-    def __init__(self, **data):
-        super().__init__(**data)
-    @property
-    def system_prompt(self) -> str:
-        return BROWSER_AGENT_SYSTEM_PROMPT.format(
-            date_time_with_day=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-            tool_descriptions=self.tool_descriptions,
-            memories="",
-        )
-    @cached_property
-    def tools(self) -> list[Tool]:
-        return self.env_tools
-    async def get_history_view(self) -> MessageHistory:
-        return MessageHistory(
-            messages=[SystemMessage(content=[Text(text=self.system_prompt)])],
-        ) + self.history.history_view(
-            limits=self.config.history_messages_limit,
-        )

src/proxy_lite/configs/default.yaml CHANGED Viewed

@@ -13,8 +13,8 @@ solver:
     name: proxy_lite
     client:
       name: convergence
-      model_id: convergence-ai/subset-distill-tools-7b-15-02-2025
-      api_base: http://slurm1-a3nodeset-4-1:8002/v1
 local_view: true
 task_timeout: 1800
 verbose: true

     name: proxy_lite
     client:
       name: convergence
+      model_id: convergence-ai/proxy-lite
+      api_base: https://convergence-ai-demo-api.hf.space/v1
 local_view: true
 task_timeout: 1800
 verbose: true

src/proxy_lite/runner.py CHANGED Viewed

@@ -208,8 +208,8 @@ if __name__ == "__main__":
             "environment": {
                 "name": "webbrowser",
                 "homepage": "https://www.google.com",
-                "viewport_width": 1920,
-                "viewport_height": 1080,
                 "screenshot_delay": 1,
                 "headless": False,
             },
@@ -219,10 +219,8 @@ if __name__ == "__main__":
                     "name": "proxy_lite",
                     "client": {
                         "name": "convergence",
-                        "model_id": "convergence-ai/all-distill-tools-7b-16-02-2025",
-                        "api_base": "http://slurm1-a3nodeset-4-1:8009/v1",
-                        #     # "model_id": "Qwen/Qwen2.5-VL-3B-Instruct",
-                        #     # "api_base": "http://0.0.0.0:8000/v1",
                     },
                 },
             },
@@ -236,10 +234,6 @@ if __name__ == "__main__":
     logger.info(f"🤖 [bold purple]Config:[/] {config}")
     runner = Runner(config=config)
-    result = asyncio.run(
-        runner.run(
-            "Tell me the tesla stock price"  # noqa: E501
-        )
-    )
     print(runner.run_result)
     print(runner.complete)

             "environment": {
                 "name": "webbrowser",
                 "homepage": "https://www.google.com",
+                "viewport_width": 1280,
+                "viewport_height": 1920,
                 "screenshot_delay": 1,
                 "headless": False,
             },
                     "name": "proxy_lite",
                     "client": {
                         "name": "convergence",
+                        "model_id": "convergence-ai/proxy-lite",
+                        "api_base": "https://convergence-ai-demo-api.hf.space/v1",
                     },
                 },
             },
     logger.info(f"🤖 [bold purple]Config:[/] {config}")
     runner = Runner(config=config)
+    result = asyncio.run(runner.run("Tell me the tesla stock price."))
     print(runner.run_result)
     print(runner.complete)

src/proxy_lite/solvers/__init__.py CHANGED Viewed

@@ -4,7 +4,6 @@ from typing import Union
 from .simple_solver import SimpleSolver, SimpleSolverConfig
 from .solver_base import BaseSolver, BaseSolverConfig, Solvers
-from .structured_solver import StructuredSolver, StructuredSolverConfig
 SolverConfigTypes = Union[*Solvers._solver_config_registry.values()]
 SolverTypes = Union[*Solvers._solver_registry.values()]
@@ -15,8 +14,6 @@ __all__ = [
     "BaseSolverConfig",
     "SimpleSolver",
     "SimpleSolverConfig",
-    "StructuredSolver",
-    "StructuredSolverConfig",
     "SolverConfigTypes",
     "SolverTypes",
     "Solvers",

 from .simple_solver import SimpleSolver, SimpleSolverConfig
 from .solver_base import BaseSolver, BaseSolverConfig, Solvers
 SolverConfigTypes = Union[*Solvers._solver_config_registry.values()]
 SolverTypes = Union[*Solvers._solver_registry.values()]
     "BaseSolverConfig",
     "SimpleSolver",
     "SimpleSolverConfig",
     "SolverConfigTypes",
     "SolverTypes",
     "Solvers",

src/proxy_lite/solvers/structured_solver.py DELETED Viewed

@@ -1,178 +0,0 @@
-# ruff: noqa: E501
-from functools import cached_property
-from typing import Literal, Optional
-from pydantic import BaseModel, Field
-from proxy_lite.agents import AgentConfigTypes, Agents, BaseAgent
-from proxy_lite.environments.environment_base import Action, Observation
-from proxy_lite.history import (
-    MessageHistory,
-    MessageLabel,
-    SystemMessage,
-)
-from proxy_lite.tools import Tool
-from .solver_base import BaseSolver, BaseSolverConfig, Solvers
-WEB_TOOL_TURN = """The browser action has been attempted. Please double check if the action was successful."""
-PLAN_USER_PROMPT = "First create a high-level plan to help solve the task on the web."
-ACTION_PROMPT = """Now take the most-promising next action in the browser.
-Only refer to the latest web elements from the latest screenshot.
-Using mark ids from older turns will lead to errors as they are no longer valid.
-Only interact with elements visible on the current webpage. Do not make up numbers or elements."""
-REASONING_PROMPT = """You will now follow these steps.
-1. **Make observations about the state of the webpage**:
-   - Consider the previous screenshot, your attempted previous action, and the current screenshot.
-   - Describe any changes you observe, and try to determine if the previous action succeeded.
-   - For example, if a form is being filled out, check whether the correct information is now displayed.
-2. **Write down any helpful facts you have gathered**:
-   - Describe any useful information on the webpage that might be helpful for completing the task.
-   - For example, if you are viewing a document, you may wish to note down any information you want to refer back to later.
-3. **Reason about the system's status**:
-   - Have you fully completed the task?
-4. **Select one of the following statuses**:
-   - "complete": if the task has been completed.
-   - "continue": if you are ready to continue without information or help.
-5. **Reason through next steps**:
-    - If the status is "continue", write down your reasoning for the next action you will take. You can only take one action at a time.
-    - If the status is not "continue", return an empty string.
-6. **Write a message to the user**:
-   - If the status is "complete", write a message to the user. If they asked a question in the task, make sure the answer is here. Otherwise, just provide other useful information about how the task went or if there was a problem in completing it.
-   - If the status is not "complete", set this to an empty string.
-Tips:
-- If you have already provided a response, don't provide it again.
-- If you notice you are repeating previous actions, you're likely stuck. Try something different."""
-class Reflection(BaseModel):
-    observation: str = Field(
-        ...,
-        description="Observation of the current browser state, including an assessment on the success of the last action (previous actions and observations are often wrong).",
-    )
-    fact_updates: list[str] = Field(
-        "",
-        description="List of new information relevant to the task that was found on the page, ignore input fields holding content you wrote.",
-    )
-    status_reasoning: str = Field(
-        ...,
-        description="Reasoning about the current state of the task.",
-    )
-    status: Literal["complete", "continue"] = Field(
-        ...,
-        description="Choose a system status based on your status reasoning.",
-    )
-    next_step_reasoning: str = Field(
-        ...,
-        description='If status is "continue", reason through the next action you will be taking (do not repeat actions over and over). Otherwise set to "".',
-    )
-    ending_message: str = Field(
-        ...,
-        description="If status is 'complete', write a message to the user. If they asked a question in the task, make sure the answer is here. Otherwise, just provide other useful information about how the task went or if there was a problem in completing it. If status is 'continue', set to ''.",
-    )
-@Solvers.register_solver_config("structured")
-class StructuredSolverConfig(BaseSolverConfig):
-    name: Literal["structured"] = "structured"
-    agent: AgentConfigTypes
-    start_with_plan: bool = True
-@Solvers.register_solver("structured")
-class StructuredSolver(BaseSolver):
-    task: Optional[str] = None
-    complete: bool = False
-    @cached_property
-    def tools(self) -> list[Tool]:
-        return self.env_tools
-    @cached_property
-    def local_tools(self) -> list[Tool]:
-        if self.sandbox:
-            return self.sandbox.tools
-        return []
-    @cached_property
-    def agent(self) -> BaseAgent:
-        self.logger.debug(f"Tools: {self.tools}")
-        return Agents.get(self.config.agent.name)(
-            config=self.config.agent,
-            env_tools=self.tools,
-        )
-    @property
-    def history(self) -> MessageHistory:
-        return MessageHistory(
-            messages=[SystemMessage.from_media(text=self.agent.system_prompt)] + self.agent.history.messages,
-        )
-    async def initialise(self, task: str, env_tools: list[Tool], env_info: str) -> None:
-        self.env_tools = env_tools
-        self.agent.receive_user_message(
-            text=env_info,
-            label=MessageLabel.USER_INPUT,
-        )
-        self.task = task
-        self.agent.receive_user_message(
-            text=f"Task: {task}",
-            label=MessageLabel.USER_INPUT,
-        )
-        if self.config.start_with_plan:
-            self.agent.receive_user_message(text=PLAN_USER_PROMPT, label=MessageLabel.PLAN)
-            await self.agent.generate_output(use_tool=False)
-    async def act(self, observation: Observation) -> Action:
-        if observation.state.tool_responses:
-            for tool_response in observation.state.tool_responses:
-                await self.agent.receive_tool_message(
-                    text=f"{WEB_TOOL_TURN}\n{tool_response.content}",
-                    tool_id=tool_response.id,
-                    label=MessageLabel.TOOL_RESULT_INDUCTION,
-                )
-        self.agent.receive_user_message(
-            image=observation.state.image,
-            text=observation.state.text,
-            label=MessageLabel.SCREENSHOT,
-            is_base64=True,
-        )
-        self.agent.receive_user_message(
-            text=REASONING_PROMPT,
-            label=MessageLabel.REASONING_INDUCTION,
-        )
-        message = await self.agent.generate_structured_output(model=Reflection)
-        self.logger.info(f"🌐 [bold blue]Observation:[/] {message.observation}")
-        if message.status == "complete":
-            self.complete = True
-            return Action(tool_calls=[], text=message.ending_message)
-        next_step = message.next_step_reasoning
-        self.agent.receive_user_message(
-            text=ACTION_PROMPT,
-            label=MessageLabel.ACTION,
-            is_base64=True,
-        )
-        message = await self.agent.generate_output(use_tool=True)
-        return Action(tool_calls=message.tool_calls, text=next_step)
-    async def is_complete(self, observation: Observation) -> bool:
-        env_terminated = observation.terminated
-        return self.complete or env_terminated