Spaces:
Running
Running
gif maker
Browse files- src/proxy_lite/cli.py +7 -0
- src/proxy_lite/configs/default.yaml +2 -0
- src/proxy_lite/gif_marker.py +124 -0
- src/proxy_lite/recorder.py +5 -0
src/proxy_lite/cli.py
CHANGED
@@ -6,6 +6,7 @@ from pathlib import Path
|
|
6 |
from typing import Optional
|
7 |
|
8 |
from proxy_lite import Runner, RunnerConfig
|
|
|
9 |
from proxy_lite.logger import logger
|
10 |
|
11 |
|
@@ -46,6 +47,12 @@ def do_command(args):
|
|
46 |
f.write(base64.b64decode(final_screenshot))
|
47 |
logger.info(f"🤖 Screenshot saved to {path}")
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
def main():
|
51 |
parser = argparse.ArgumentParser(description="Proxy-Lite")
|
|
|
6 |
from typing import Optional
|
7 |
|
8 |
from proxy_lite import Runner, RunnerConfig
|
9 |
+
from proxy_lite.gif_marker import create_run_gif
|
10 |
from proxy_lite.logger import logger
|
11 |
|
12 |
|
|
|
47 |
f.write(base64.b64decode(final_screenshot))
|
48 |
logger.info(f"🤖 Screenshot saved to {path}")
|
49 |
|
50 |
+
gif_folder_path = Path(__file__).parent.parent.parent / "gifs"
|
51 |
+
gif_folder_path.mkdir(parents=True, exist_ok=True)
|
52 |
+
gif_path = gif_folder_path / f"{result.run_id}.gif"
|
53 |
+
create_run_gif(result, gif_path, duration=1500)
|
54 |
+
logger.info(f"🤖 GIF saved to {gif_path}")
|
55 |
+
|
56 |
|
57 |
def main():
|
58 |
parser = argparse.ArgumentParser(description="Proxy-Lite")
|
src/proxy_lite/configs/default.yaml
CHANGED
@@ -18,4 +18,6 @@ solver:
|
|
18 |
api_base: https://convergence-ai-demo-api.hf.space/v1
|
19 |
local_view: true
|
20 |
task_timeout: 1800
|
|
|
|
|
21 |
verbose: true
|
|
|
18 |
api_base: https://convergence-ai-demo-api.hf.space/v1
|
19 |
local_view: true
|
20 |
task_timeout: 1800
|
21 |
+
environment_timeout: 1800
|
22 |
+
action_timeout: 1800
|
23 |
verbose: true
|
src/proxy_lite/gif_marker.py
ADDED
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
+
import re
|
3 |
+
import textwrap
|
4 |
+
from io import BytesIO
|
5 |
+
|
6 |
+
from PIL import Image, ImageDraw, ImageFont
|
7 |
+
|
8 |
+
from proxy_lite.environments.environment_base import Action, Observation
|
9 |
+
from proxy_lite.recorder import Run
|
10 |
+
|
11 |
+
|
12 |
+
def create_run_gif(
|
13 |
+
run: Run, output_path: str, white_panel_width: int = 300, duration: int = 2000, resize_factor: int = 4
|
14 |
+
) -> None:
|
15 |
+
"""
|
16 |
+
Generate a gif from the Run object's history.
|
17 |
+
|
18 |
+
For each Observation record, the observation image is decoded from its base64
|
19 |
+
encoded string. If the next record is an Action, its text is drawn onto a
|
20 |
+
white panel. The observation image and the white panel are then concatenated
|
21 |
+
horizontally to produce a frame.
|
22 |
+
|
23 |
+
Parameters:
|
24 |
+
run (Run): A Run object with its history containing Observation and Action records.
|
25 |
+
output_path (str): The path where the GIF will be saved.
|
26 |
+
white_panel_width (int): The width of the white panel for displaying text.
|
27 |
+
Default increased to 400 for larger images.
|
28 |
+
duration (int): Duration between frames in milliseconds.
|
29 |
+
Increased here to slow the FPS (default is 1000ms).
|
30 |
+
resize_factor (int): The factor to resize the image down by.
|
31 |
+
"""
|
32 |
+
frames = []
|
33 |
+
history = run.history
|
34 |
+
i = 0
|
35 |
+
while i < len(history):
|
36 |
+
if isinstance(history[i], Observation):
|
37 |
+
observation = history[i]
|
38 |
+
image_data = observation.state.image
|
39 |
+
if not image_data:
|
40 |
+
i += 1
|
41 |
+
continue
|
42 |
+
# Decode the base64 image
|
43 |
+
image_bytes = base64.b64decode(image_data)
|
44 |
+
obs_img = Image.open(BytesIO(image_bytes)).convert("RGB")
|
45 |
+
|
46 |
+
# scale the image down to 1/4 of its original size
|
47 |
+
obs_img = obs_img.resize((obs_img.width // resize_factor, obs_img.height // resize_factor))
|
48 |
+
|
49 |
+
# Check if the next record is an Action and extract its text if available
|
50 |
+
action_text = ""
|
51 |
+
if i + 1 < len(history) and isinstance(history[i + 1], Action):
|
52 |
+
action = history[i + 1]
|
53 |
+
if action.text:
|
54 |
+
action_text = action.text
|
55 |
+
|
56 |
+
# extract observation and thinking from tags in the action text
|
57 |
+
observation_match = re.search(r"<observation>(.*?)</observation>", action_text, re.DOTALL)
|
58 |
+
observation_content = observation_match.group(1).strip() if observation_match else None
|
59 |
+
|
60 |
+
# Extract text between thinking tags if present
|
61 |
+
thinking_match = re.search(r"<thinking>(.*?)</thinking>", action_text, re.DOTALL)
|
62 |
+
thinking_content = thinking_match.group(1).strip() if thinking_match else None
|
63 |
+
|
64 |
+
if observation_content and thinking_content:
|
65 |
+
action_text = f"Observation: {observation_content}\n\nThinking: {thinking_content}"
|
66 |
+
|
67 |
+
# Create a white panel (same height as the observation image)
|
68 |
+
panel = Image.new("RGB", (white_panel_width, obs_img.height), "white")
|
69 |
+
draw = ImageDraw.Draw(panel)
|
70 |
+
font = ImageFont.load_default()
|
71 |
+
|
72 |
+
# Wrap the action text if it is too long
|
73 |
+
max_chars_per_line = 40 # Adjusted for larger font size
|
74 |
+
wrapped_text = textwrap.fill(action_text, width=max_chars_per_line)
|
75 |
+
|
76 |
+
# Calculate text block size and center it on the panel
|
77 |
+
try:
|
78 |
+
# Use multiline_textbbox if available (returns bounding box tuple)
|
79 |
+
bbox = draw.multiline_textbbox((0, 0), wrapped_text, font=font)
|
80 |
+
text_width, text_height = bbox[2] - bbox[0], bbox[3] - bbox[1]
|
81 |
+
except AttributeError:
|
82 |
+
# Fallback for older Pillow versions: compute size for each line
|
83 |
+
lines = wrapped_text.splitlines() or [wrapped_text]
|
84 |
+
line_sizes = [draw.textsize(line, font=font) for line in lines]
|
85 |
+
text_width = max(width for width, _ in line_sizes)
|
86 |
+
text_height = sum(height for _, height in line_sizes)
|
87 |
+
text_x = (white_panel_width - text_width) // 2
|
88 |
+
text_y = (obs_img.height - text_height) // 2
|
89 |
+
draw.multiline_text((text_x, text_y), wrapped_text, fill="black", font=font, align="center")
|
90 |
+
|
91 |
+
# Create the combined frame by concatenating the observation image and the panel
|
92 |
+
total_width = obs_img.width + white_panel_width
|
93 |
+
combined_frame = Image.new("RGB", (total_width, obs_img.height))
|
94 |
+
combined_frame.paste(obs_img, (0, 0))
|
95 |
+
combined_frame.paste(panel, (obs_img.width, 0))
|
96 |
+
frames.append(combined_frame)
|
97 |
+
|
98 |
+
# Skip the Action record since it has been processed with this Observation
|
99 |
+
if i + 1 < len(history) and isinstance(history[i + 1], Action):
|
100 |
+
i += 2
|
101 |
+
else:
|
102 |
+
i += 1
|
103 |
+
else:
|
104 |
+
i += 1
|
105 |
+
|
106 |
+
if frames:
|
107 |
+
frames[0].save(output_path, save_all=True, append_images=frames[1:], duration=duration, loop=0)
|
108 |
+
else:
|
109 |
+
raise ValueError("No frames were generated from the Run object's history.")
|
110 |
+
|
111 |
+
|
112 |
+
# Example usage:
|
113 |
+
if __name__ == "__main__":
|
114 |
+
# This is a simple example to demonstrate usage.
|
115 |
+
# In practice, replace this with your actual Run object.
|
116 |
+
from proxy_lite.recorder import Run
|
117 |
+
|
118 |
+
dummy_run = Run.load("0abdb4cb-f289-48b0-ba13-35ed1210f7c1")
|
119 |
+
|
120 |
+
num_steps = int(len(dummy_run.history) / 2)
|
121 |
+
print(f"Number of steps: {num_steps}")
|
122 |
+
output_gif_path = "trajectory.gif"
|
123 |
+
create_run_gif(dummy_run, output_gif_path, duration=1000)
|
124 |
+
print(f"Trajectory GIF saved to {output_gif_path}")
|
src/proxy_lite/recorder.py
CHANGED
@@ -38,6 +38,11 @@ class Run(BaseModel):
|
|
38 |
created_at=str(datetime.datetime.now(datetime.UTC)),
|
39 |
)
|
40 |
|
|
|
|
|
|
|
|
|
|
|
41 |
@property
|
42 |
def observations(self) -> list[Observation]:
|
43 |
return [h for h in self.history if isinstance(h, Observation)]
|
|
|
38 |
created_at=str(datetime.datetime.now(datetime.UTC)),
|
39 |
)
|
40 |
|
41 |
+
@classmethod
|
42 |
+
def load(cls, run_id: str) -> Self:
|
43 |
+
with open(Path(__file__).parent.parent.parent / "local_trajectories" / f"{run_id}.json", "r") as f:
|
44 |
+
return cls(**json.load(f))
|
45 |
+
|
46 |
@property
|
47 |
def observations(self) -> list[Observation]:
|
48 |
return [h for h in self.history if isinstance(h, Observation)]
|