default endpoint
Browse files
src/proxy_lite/agents/__init__.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
from typing import Union
|
| 2 |
|
| 3 |
from .agent_base import Agents, BaseAgent, BaseAgentConfig
|
| 4 |
-
from .browser_agent import BrowserAgent, BrowserAgentConfig
|
| 5 |
from .proxy_lite_agent import ProxyLiteAgent, ProxyLiteAgentConfig
|
| 6 |
|
| 7 |
AgentTypes = Union[*list(Agents._agent_registry.values())]
|
|
@@ -14,8 +13,6 @@ __all__ = [
|
|
| 14 |
"Agents",
|
| 15 |
"BaseAgent",
|
| 16 |
"BaseAgentConfig",
|
| 17 |
-
"BrowserAgent",
|
| 18 |
-
"BrowserAgentConfig",
|
| 19 |
"ProxyLiteAgent",
|
| 20 |
"ProxyLiteAgentConfig",
|
| 21 |
]
|
|
|
|
| 1 |
from typing import Union
|
| 2 |
|
| 3 |
from .agent_base import Agents, BaseAgent, BaseAgentConfig
|
|
|
|
| 4 |
from .proxy_lite_agent import ProxyLiteAgent, ProxyLiteAgentConfig
|
| 5 |
|
| 6 |
AgentTypes = Union[*list(Agents._agent_registry.values())]
|
|
|
|
| 13 |
"Agents",
|
| 14 |
"BaseAgent",
|
| 15 |
"BaseAgentConfig",
|
|
|
|
|
|
|
| 16 |
"ProxyLiteAgent",
|
| 17 |
"ProxyLiteAgentConfig",
|
| 18 |
]
|
src/proxy_lite/agents/browser_agent.py
DELETED
|
@@ -1,133 +0,0 @@
|
|
| 1 |
-
from datetime import datetime
|
| 2 |
-
from functools import cached_property
|
| 3 |
-
from typing import Literal
|
| 4 |
-
|
| 5 |
-
from pydantic import Field
|
| 6 |
-
|
| 7 |
-
from proxy_lite.agents.agent_base import Agents, BaseAgent, BaseAgentConfig
|
| 8 |
-
from proxy_lite.history import MessageHistory, MessageLabel, SystemMessage, Text
|
| 9 |
-
from proxy_lite.tools import Tool
|
| 10 |
-
|
| 11 |
-
BROWSER_AGENT_SYSTEM_PROMPT = """ **You are Proxy Lite, the Web-Browsing Agent.** You are developed by Convergence.
|
| 12 |
-
|
| 13 |
-
**Current date:** {date_time_with_day}.
|
| 14 |
-
|
| 15 |
-
You are given:
|
| 16 |
-
|
| 17 |
-
1. A user task that you are trying to complete.
|
| 18 |
-
2. Relevant facts we have at our disposal.
|
| 19 |
-
3. A high level plan to complete the task.
|
| 20 |
-
4. A history of previous actions and observations.
|
| 21 |
-
5. An annotated webpage screenshot and text description of what's visible in the browser before and after the last action.
|
| 22 |
-
|
| 23 |
-
## Objective
|
| 24 |
-
|
| 25 |
-
You are an expert at controlling the web browser.
|
| 26 |
-
You will be assisting a user with a task they are trying to complete on the web.
|
| 27 |
-
|
| 28 |
-
## Web Screenshots
|
| 29 |
-
|
| 30 |
-
Each iteration of your browsing loop, you'll be provided with a screenshot of the browser.
|
| 31 |
-
|
| 32 |
-
The screenshot will have red rectangular annotations. These annotations highlight the marked elements you can interact with.
|
| 33 |
-
|
| 34 |
-
## Mark IDs
|
| 35 |
-
|
| 36 |
-
Each annotated element is labeled with a "mark id" in the top-left corner.
|
| 37 |
-
|
| 38 |
-
When using tools like typing or clicking, specify the "mark id" to indicate which element you want to interact with.
|
| 39 |
-
|
| 40 |
-
If an element is not annotated, you cannot interact with it. This is a limitation of the software. Focus on marked elements only.
|
| 41 |
-
|
| 42 |
-
## Text Snippets
|
| 43 |
-
|
| 44 |
-
Along with the screenshot, you will receive text snippets describing each annotated element.
|
| 45 |
-
|
| 46 |
-
Here’s an example of different element types:
|
| 47 |
-
|
| 48 |
-
- [0] `<a>text</a>` → Mark 0 is a link (`<a>` tag) containing the text "text".
|
| 49 |
-
- [1] `<button>text</button>` → Mark 1 is a button (`<button>` tag) containing the text "text".
|
| 50 |
-
- [2] `<input value="text"/>` → Mark 2 is an input field (`<input>` tag) with the value "text".
|
| 51 |
-
- [3] `<select>text</select>` → Mark 3 is a dropdown menu (`<select>` tag) with the option "text" selected.
|
| 52 |
-
- [4] `<textarea>text</textarea>` → Mark 4 is a text area (`<textarea>` tag) containing the text "text".
|
| 53 |
-
- [5] `<li>text</li>` → Mark 5 is a list item (`<li>` tag) containing the text "text".
|
| 54 |
-
- [6] `<div scrollable>text</div>` → Mark 6 is a division (`<div>` tag) containing the text "text" and is scrollable.
|
| 55 |
-
- [7] `<td>text</td>` → Mark 7 is a table cell (`<td>` tag) containing the text "text".
|
| 56 |
-
|
| 57 |
-
Note that these text snippets may be incomplete.
|
| 58 |
-
|
| 59 |
-
## History
|
| 60 |
-
|
| 61 |
-
You will see your past actions and observations but not old annotated webpages.
|
| 62 |
-
|
| 63 |
-
This means annotated webpages showing useful information will not be visible in future actions.
|
| 64 |
-
|
| 65 |
-
To get around this, key details from each webpage are stored in observations.
|
| 66 |
-
|
| 67 |
-
## Web Browser Actions
|
| 68 |
-
|
| 69 |
-
You can only take the following actions with the web browser:
|
| 70 |
-
{tool_descriptions}
|
| 71 |
-
|
| 72 |
-
## Important Browsing Tips
|
| 73 |
-
|
| 74 |
-
If there is a modal overlay that is unresponsive on the page try reloading the webpage.
|
| 75 |
-
|
| 76 |
-
If there is a cookie consent form covering part of the page just click accept on the form.
|
| 77 |
-
|
| 78 |
-
When typing into a text field be sure to click one of the dropdown options (when present). Not selecting a dropdown option will result in the field being cleared after the next action.
|
| 79 |
-
|
| 80 |
-
You do not have access any internet accounts (outside of those provided by the user).
|
| 81 |
-
|
| 82 |
-
The browser has a built in CAPTCHA solver, if you are asked to solve one just wait and it will be solved for you.
|
| 83 |
-
|
| 84 |
-
## Don't Repeat the Same Actions Continuously
|
| 85 |
-
|
| 86 |
-
If you find yourself repeating an action without making progress, try another action.
|
| 87 |
-
|
| 88 |
-
## Task
|
| 89 |
-
|
| 90 |
-
You will now be connected to the user, who will give you their task.""" # noqa: E501
|
| 91 |
-
|
| 92 |
-
MAX_MESSAGES_FOR_CONTEXT_WINDOW = {
|
| 93 |
-
MessageLabel.SCREENSHOT: 1,
|
| 94 |
-
# MessageLabel.REASONING_INDUCTION: 1,
|
| 95 |
-
# MessageLabel.FORMAT_INSTRUCTIONS: 1,
|
| 96 |
-
# MessageLabel.ACTION: 1,
|
| 97 |
-
}
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
@Agents.register_agent_config("browser")
|
| 101 |
-
class BrowserAgentConfig(BaseAgentConfig):
|
| 102 |
-
name: Literal["browser"] = "browser"
|
| 103 |
-
history_messages_limit: dict[MessageLabel, int] = Field(
|
| 104 |
-
default_factory=lambda: MAX_MESSAGES_FOR_CONTEXT_WINDOW,
|
| 105 |
-
)
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
@Agents.register_agent("browser")
|
| 109 |
-
class BrowserAgent(BaseAgent):
|
| 110 |
-
config: BrowserAgentConfig
|
| 111 |
-
message_label: MessageLabel = MessageLabel.AGENT_MODEL_RESPONSE
|
| 112 |
-
|
| 113 |
-
def __init__(self, **data):
|
| 114 |
-
super().__init__(**data)
|
| 115 |
-
|
| 116 |
-
@property
|
| 117 |
-
def system_prompt(self) -> str:
|
| 118 |
-
return BROWSER_AGENT_SYSTEM_PROMPT.format(
|
| 119 |
-
date_time_with_day=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
| 120 |
-
tool_descriptions=self.tool_descriptions,
|
| 121 |
-
memories="",
|
| 122 |
-
)
|
| 123 |
-
|
| 124 |
-
@cached_property
|
| 125 |
-
def tools(self) -> list[Tool]:
|
| 126 |
-
return self.env_tools
|
| 127 |
-
|
| 128 |
-
async def get_history_view(self) -> MessageHistory:
|
| 129 |
-
return MessageHistory(
|
| 130 |
-
messages=[SystemMessage(content=[Text(text=self.system_prompt)])],
|
| 131 |
-
) + self.history.history_view(
|
| 132 |
-
limits=self.config.history_messages_limit,
|
| 133 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/proxy_lite/configs/default.yaml
CHANGED
|
@@ -13,8 +13,8 @@ solver:
|
|
| 13 |
name: proxy_lite
|
| 14 |
client:
|
| 15 |
name: convergence
|
| 16 |
-
model_id: convergence-ai/
|
| 17 |
-
api_base:
|
| 18 |
local_view: true
|
| 19 |
task_timeout: 1800
|
| 20 |
verbose: true
|
|
|
|
| 13 |
name: proxy_lite
|
| 14 |
client:
|
| 15 |
name: convergence
|
| 16 |
+
model_id: convergence-ai/proxy-lite
|
| 17 |
+
api_base: https://convergence-ai-demo-api.hf.space/v1
|
| 18 |
local_view: true
|
| 19 |
task_timeout: 1800
|
| 20 |
verbose: true
|
src/proxy_lite/runner.py
CHANGED
|
@@ -208,8 +208,8 @@ if __name__ == "__main__":
|
|
| 208 |
"environment": {
|
| 209 |
"name": "webbrowser",
|
| 210 |
"homepage": "https://www.google.com",
|
| 211 |
-
"viewport_width":
|
| 212 |
-
"viewport_height":
|
| 213 |
"screenshot_delay": 1,
|
| 214 |
"headless": False,
|
| 215 |
},
|
|
@@ -219,10 +219,8 @@ if __name__ == "__main__":
|
|
| 219 |
"name": "proxy_lite",
|
| 220 |
"client": {
|
| 221 |
"name": "convergence",
|
| 222 |
-
"model_id": "convergence-ai/
|
| 223 |
-
"api_base": "
|
| 224 |
-
# # "model_id": "Qwen/Qwen2.5-VL-3B-Instruct",
|
| 225 |
-
# # "api_base": "http://0.0.0.0:8000/v1",
|
| 226 |
},
|
| 227 |
},
|
| 228 |
},
|
|
@@ -236,10 +234,6 @@ if __name__ == "__main__":
|
|
| 236 |
logger.info(f"🤖 [bold purple]Config:[/] {config}")
|
| 237 |
|
| 238 |
runner = Runner(config=config)
|
| 239 |
-
result = asyncio.run(
|
| 240 |
-
runner.run(
|
| 241 |
-
"Tell me the tesla stock price" # noqa: E501
|
| 242 |
-
)
|
| 243 |
-
)
|
| 244 |
print(runner.run_result)
|
| 245 |
print(runner.complete)
|
|
|
|
| 208 |
"environment": {
|
| 209 |
"name": "webbrowser",
|
| 210 |
"homepage": "https://www.google.com",
|
| 211 |
+
"viewport_width": 1280,
|
| 212 |
+
"viewport_height": 1920,
|
| 213 |
"screenshot_delay": 1,
|
| 214 |
"headless": False,
|
| 215 |
},
|
|
|
|
| 219 |
"name": "proxy_lite",
|
| 220 |
"client": {
|
| 221 |
"name": "convergence",
|
| 222 |
+
"model_id": "convergence-ai/proxy-lite",
|
| 223 |
+
"api_base": "https://convergence-ai-demo-api.hf.space/v1",
|
|
|
|
|
|
|
| 224 |
},
|
| 225 |
},
|
| 226 |
},
|
|
|
|
| 234 |
logger.info(f"🤖 [bold purple]Config:[/] {config}")
|
| 235 |
|
| 236 |
runner = Runner(config=config)
|
| 237 |
+
result = asyncio.run(runner.run("Tell me the tesla stock price."))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
print(runner.run_result)
|
| 239 |
print(runner.complete)
|
src/proxy_lite/solvers/__init__.py
CHANGED
|
@@ -4,7 +4,6 @@ from typing import Union
|
|
| 4 |
|
| 5 |
from .simple_solver import SimpleSolver, SimpleSolverConfig
|
| 6 |
from .solver_base import BaseSolver, BaseSolverConfig, Solvers
|
| 7 |
-
from .structured_solver import StructuredSolver, StructuredSolverConfig
|
| 8 |
|
| 9 |
SolverConfigTypes = Union[*Solvers._solver_config_registry.values()]
|
| 10 |
SolverTypes = Union[*Solvers._solver_registry.values()]
|
|
@@ -15,8 +14,6 @@ __all__ = [
|
|
| 15 |
"BaseSolverConfig",
|
| 16 |
"SimpleSolver",
|
| 17 |
"SimpleSolverConfig",
|
| 18 |
-
"StructuredSolver",
|
| 19 |
-
"StructuredSolverConfig",
|
| 20 |
"SolverConfigTypes",
|
| 21 |
"SolverTypes",
|
| 22 |
"Solvers",
|
|
|
|
| 4 |
|
| 5 |
from .simple_solver import SimpleSolver, SimpleSolverConfig
|
| 6 |
from .solver_base import BaseSolver, BaseSolverConfig, Solvers
|
|
|
|
| 7 |
|
| 8 |
SolverConfigTypes = Union[*Solvers._solver_config_registry.values()]
|
| 9 |
SolverTypes = Union[*Solvers._solver_registry.values()]
|
|
|
|
| 14 |
"BaseSolverConfig",
|
| 15 |
"SimpleSolver",
|
| 16 |
"SimpleSolverConfig",
|
|
|
|
|
|
|
| 17 |
"SolverConfigTypes",
|
| 18 |
"SolverTypes",
|
| 19 |
"Solvers",
|
src/proxy_lite/solvers/structured_solver.py
DELETED
|
@@ -1,178 +0,0 @@
|
|
| 1 |
-
# ruff: noqa: E501
|
| 2 |
-
|
| 3 |
-
from functools import cached_property
|
| 4 |
-
from typing import Literal, Optional
|
| 5 |
-
|
| 6 |
-
from pydantic import BaseModel, Field
|
| 7 |
-
|
| 8 |
-
from proxy_lite.agents import AgentConfigTypes, Agents, BaseAgent
|
| 9 |
-
from proxy_lite.environments.environment_base import Action, Observation
|
| 10 |
-
from proxy_lite.history import (
|
| 11 |
-
MessageHistory,
|
| 12 |
-
MessageLabel,
|
| 13 |
-
SystemMessage,
|
| 14 |
-
)
|
| 15 |
-
from proxy_lite.tools import Tool
|
| 16 |
-
|
| 17 |
-
from .solver_base import BaseSolver, BaseSolverConfig, Solvers
|
| 18 |
-
|
| 19 |
-
WEB_TOOL_TURN = """The browser action has been attempted. Please double check if the action was successful."""
|
| 20 |
-
PLAN_USER_PROMPT = "First create a high-level plan to help solve the task on the web."
|
| 21 |
-
ACTION_PROMPT = """Now take the most-promising next action in the browser.
|
| 22 |
-
|
| 23 |
-
Only refer to the latest web elements from the latest screenshot.
|
| 24 |
-
|
| 25 |
-
Using mark ids from older turns will lead to errors as they are no longer valid.
|
| 26 |
-
|
| 27 |
-
Only interact with elements visible on the current webpage. Do not make up numbers or elements."""
|
| 28 |
-
REASONING_PROMPT = """You will now follow these steps.
|
| 29 |
-
|
| 30 |
-
1. **Make observations about the state of the webpage**:
|
| 31 |
-
- Consider the previous screenshot, your attempted previous action, and the current screenshot.
|
| 32 |
-
- Describe any changes you observe, and try to determine if the previous action succeeded.
|
| 33 |
-
- For example, if a form is being filled out, check whether the correct information is now displayed.
|
| 34 |
-
|
| 35 |
-
2. **Write down any helpful facts you have gathered**:
|
| 36 |
-
- Describe any useful information on the webpage that might be helpful for completing the task.
|
| 37 |
-
- For example, if you are viewing a document, you may wish to note down any information you want to refer back to later.
|
| 38 |
-
|
| 39 |
-
3. **Reason about the system's status**:
|
| 40 |
-
- Have you fully completed the task?
|
| 41 |
-
|
| 42 |
-
4. **Select one of the following statuses**:
|
| 43 |
-
- "complete": if the task has been completed.
|
| 44 |
-
- "continue": if you are ready to continue without information or help.
|
| 45 |
-
|
| 46 |
-
5. **Reason through next steps**:
|
| 47 |
-
- If the status is "continue", write down your reasoning for the next action you will take. You can only take one action at a time.
|
| 48 |
-
- If the status is not "continue", return an empty string.
|
| 49 |
-
|
| 50 |
-
6. **Write a message to the user**:
|
| 51 |
-
- If the status is "complete", write a message to the user. If they asked a question in the task, make sure the answer is here. Otherwise, just provide other useful information about how the task went or if there was a problem in completing it.
|
| 52 |
-
- If the status is not "complete", set this to an empty string.
|
| 53 |
-
|
| 54 |
-
Tips:
|
| 55 |
-
- If you have already provided a response, don't provide it again.
|
| 56 |
-
- If you notice you are repeating previous actions, you're likely stuck. Try something different."""
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
class Reflection(BaseModel):
|
| 60 |
-
observation: str = Field(
|
| 61 |
-
...,
|
| 62 |
-
description="Observation of the current browser state, including an assessment on the success of the last action (previous actions and observations are often wrong).",
|
| 63 |
-
)
|
| 64 |
-
fact_updates: list[str] = Field(
|
| 65 |
-
"",
|
| 66 |
-
description="List of new information relevant to the task that was found on the page, ignore input fields holding content you wrote.",
|
| 67 |
-
)
|
| 68 |
-
status_reasoning: str = Field(
|
| 69 |
-
...,
|
| 70 |
-
description="Reasoning about the current state of the task.",
|
| 71 |
-
)
|
| 72 |
-
status: Literal["complete", "continue"] = Field(
|
| 73 |
-
...,
|
| 74 |
-
description="Choose a system status based on your status reasoning.",
|
| 75 |
-
)
|
| 76 |
-
next_step_reasoning: str = Field(
|
| 77 |
-
...,
|
| 78 |
-
description='If status is "continue", reason through the next action you will be taking (do not repeat actions over and over). Otherwise set to "".',
|
| 79 |
-
)
|
| 80 |
-
ending_message: str = Field(
|
| 81 |
-
...,
|
| 82 |
-
description="If status is 'complete', write a message to the user. If they asked a question in the task, make sure the answer is here. Otherwise, just provide other useful information about how the task went or if there was a problem in completing it. If status is 'continue', set to ''.",
|
| 83 |
-
)
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
@Solvers.register_solver_config("structured")
|
| 87 |
-
class StructuredSolverConfig(BaseSolverConfig):
|
| 88 |
-
name: Literal["structured"] = "structured"
|
| 89 |
-
agent: AgentConfigTypes
|
| 90 |
-
start_with_plan: bool = True
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
@Solvers.register_solver("structured")
|
| 94 |
-
class StructuredSolver(BaseSolver):
|
| 95 |
-
task: Optional[str] = None
|
| 96 |
-
complete: bool = False
|
| 97 |
-
|
| 98 |
-
@cached_property
|
| 99 |
-
def tools(self) -> list[Tool]:
|
| 100 |
-
return self.env_tools
|
| 101 |
-
|
| 102 |
-
@cached_property
|
| 103 |
-
def local_tools(self) -> list[Tool]:
|
| 104 |
-
if self.sandbox:
|
| 105 |
-
return self.sandbox.tools
|
| 106 |
-
return []
|
| 107 |
-
|
| 108 |
-
@cached_property
|
| 109 |
-
def agent(self) -> BaseAgent:
|
| 110 |
-
self.logger.debug(f"Tools: {self.tools}")
|
| 111 |
-
return Agents.get(self.config.agent.name)(
|
| 112 |
-
config=self.config.agent,
|
| 113 |
-
env_tools=self.tools,
|
| 114 |
-
)
|
| 115 |
-
|
| 116 |
-
@property
|
| 117 |
-
def history(self) -> MessageHistory:
|
| 118 |
-
return MessageHistory(
|
| 119 |
-
messages=[SystemMessage.from_media(text=self.agent.system_prompt)] + self.agent.history.messages,
|
| 120 |
-
)
|
| 121 |
-
|
| 122 |
-
async def initialise(self, task: str, env_tools: list[Tool], env_info: str) -> None:
|
| 123 |
-
self.env_tools = env_tools
|
| 124 |
-
self.agent.receive_user_message(
|
| 125 |
-
text=env_info,
|
| 126 |
-
label=MessageLabel.USER_INPUT,
|
| 127 |
-
)
|
| 128 |
-
self.task = task
|
| 129 |
-
self.agent.receive_user_message(
|
| 130 |
-
text=f"Task: {task}",
|
| 131 |
-
label=MessageLabel.USER_INPUT,
|
| 132 |
-
)
|
| 133 |
-
if self.config.start_with_plan:
|
| 134 |
-
self.agent.receive_user_message(text=PLAN_USER_PROMPT, label=MessageLabel.PLAN)
|
| 135 |
-
await self.agent.generate_output(use_tool=False)
|
| 136 |
-
|
| 137 |
-
async def act(self, observation: Observation) -> Action:
|
| 138 |
-
if observation.state.tool_responses:
|
| 139 |
-
for tool_response in observation.state.tool_responses:
|
| 140 |
-
await self.agent.receive_tool_message(
|
| 141 |
-
text=f"{WEB_TOOL_TURN}\n{tool_response.content}",
|
| 142 |
-
tool_id=tool_response.id,
|
| 143 |
-
label=MessageLabel.TOOL_RESULT_INDUCTION,
|
| 144 |
-
)
|
| 145 |
-
|
| 146 |
-
self.agent.receive_user_message(
|
| 147 |
-
image=observation.state.image,
|
| 148 |
-
text=observation.state.text,
|
| 149 |
-
label=MessageLabel.SCREENSHOT,
|
| 150 |
-
is_base64=True,
|
| 151 |
-
)
|
| 152 |
-
|
| 153 |
-
self.agent.receive_user_message(
|
| 154 |
-
text=REASONING_PROMPT,
|
| 155 |
-
label=MessageLabel.REASONING_INDUCTION,
|
| 156 |
-
)
|
| 157 |
-
|
| 158 |
-
message = await self.agent.generate_structured_output(model=Reflection)
|
| 159 |
-
self.logger.info(f"🌐 [bold blue]Observation:[/] {message.observation}")
|
| 160 |
-
|
| 161 |
-
if message.status == "complete":
|
| 162 |
-
self.complete = True
|
| 163 |
-
return Action(tool_calls=[], text=message.ending_message)
|
| 164 |
-
|
| 165 |
-
next_step = message.next_step_reasoning
|
| 166 |
-
|
| 167 |
-
self.agent.receive_user_message(
|
| 168 |
-
text=ACTION_PROMPT,
|
| 169 |
-
label=MessageLabel.ACTION,
|
| 170 |
-
is_base64=True,
|
| 171 |
-
)
|
| 172 |
-
message = await self.agent.generate_output(use_tool=True)
|
| 173 |
-
|
| 174 |
-
return Action(tool_calls=message.tool_calls, text=next_step)
|
| 175 |
-
|
| 176 |
-
async def is_complete(self, observation: Observation) -> bool:
|
| 177 |
-
env_terminated = observation.terminated
|
| 178 |
-
return self.complete or env_terminated
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|