XanderJC commited on
Commit
0202a68
·
1 Parent(s): 853581f
src/proxy_lite/cli.py CHANGED
@@ -6,6 +6,7 @@ from pathlib import Path
6
  from typing import Optional
7
 
8
  from proxy_lite import Runner, RunnerConfig
 
9
  from proxy_lite.logger import logger
10
 
11
 
@@ -46,6 +47,12 @@ def do_command(args):
46
  f.write(base64.b64decode(final_screenshot))
47
  logger.info(f"🤖 Screenshot saved to {path}")
48
 
 
 
 
 
 
 
49
 
50
  def main():
51
  parser = argparse.ArgumentParser(description="Proxy-Lite")
 
6
  from typing import Optional
7
 
8
  from proxy_lite import Runner, RunnerConfig
9
+ from proxy_lite.gif_marker import create_run_gif
10
  from proxy_lite.logger import logger
11
 
12
 
 
47
  f.write(base64.b64decode(final_screenshot))
48
  logger.info(f"🤖 Screenshot saved to {path}")
49
 
50
+ gif_folder_path = Path(__file__).parent.parent.parent / "gifs"
51
+ gif_folder_path.mkdir(parents=True, exist_ok=True)
52
+ gif_path = gif_folder_path / f"{result.run_id}.gif"
53
+ create_run_gif(result, gif_path, duration=1500)
54
+ logger.info(f"🤖 GIF saved to {gif_path}")
55
+
56
 
57
  def main():
58
  parser = argparse.ArgumentParser(description="Proxy-Lite")
src/proxy_lite/configs/default.yaml CHANGED
@@ -18,4 +18,6 @@ solver:
18
  api_base: https://convergence-ai-demo-api.hf.space/v1
19
  local_view: true
20
  task_timeout: 1800
 
 
21
  verbose: true
 
18
  api_base: https://convergence-ai-demo-api.hf.space/v1
19
  local_view: true
20
  task_timeout: 1800
21
+ environment_timeout: 1800
22
+ action_timeout: 1800
23
  verbose: true
src/proxy_lite/gif_marker.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import re
3
+ import textwrap
4
+ from io import BytesIO
5
+
6
+ from PIL import Image, ImageDraw, ImageFont
7
+
8
+ from proxy_lite.environments.environment_base import Action, Observation
9
+ from proxy_lite.recorder import Run
10
+
11
+
12
+ def create_run_gif(
13
+ run: Run, output_path: str, white_panel_width: int = 300, duration: int = 2000, resize_factor: int = 4
14
+ ) -> None:
15
+ """
16
+ Generate a gif from the Run object's history.
17
+
18
+ For each Observation record, the observation image is decoded from its base64
19
+ encoded string. If the next record is an Action, its text is drawn onto a
20
+ white panel. The observation image and the white panel are then concatenated
21
+ horizontally to produce a frame.
22
+
23
+ Parameters:
24
+ run (Run): A Run object with its history containing Observation and Action records.
25
+ output_path (str): The path where the GIF will be saved.
26
+ white_panel_width (int): The width of the white panel for displaying text.
27
+ Default increased to 400 for larger images.
28
+ duration (int): Duration between frames in milliseconds.
29
+ Increased here to slow the FPS (default is 1000ms).
30
+ resize_factor (int): The factor to resize the image down by.
31
+ """
32
+ frames = []
33
+ history = run.history
34
+ i = 0
35
+ while i < len(history):
36
+ if isinstance(history[i], Observation):
37
+ observation = history[i]
38
+ image_data = observation.state.image
39
+ if not image_data:
40
+ i += 1
41
+ continue
42
+ # Decode the base64 image
43
+ image_bytes = base64.b64decode(image_data)
44
+ obs_img = Image.open(BytesIO(image_bytes)).convert("RGB")
45
+
46
+ # scale the image down to 1/4 of its original size
47
+ obs_img = obs_img.resize((obs_img.width // resize_factor, obs_img.height // resize_factor))
48
+
49
+ # Check if the next record is an Action and extract its text if available
50
+ action_text = ""
51
+ if i + 1 < len(history) and isinstance(history[i + 1], Action):
52
+ action = history[i + 1]
53
+ if action.text:
54
+ action_text = action.text
55
+
56
+ # extract observation and thinking from tags in the action text
57
+ observation_match = re.search(r"<observation>(.*?)</observation>", action_text, re.DOTALL)
58
+ observation_content = observation_match.group(1).strip() if observation_match else None
59
+
60
+ # Extract text between thinking tags if present
61
+ thinking_match = re.search(r"<thinking>(.*?)</thinking>", action_text, re.DOTALL)
62
+ thinking_content = thinking_match.group(1).strip() if thinking_match else None
63
+
64
+ if observation_content and thinking_content:
65
+ action_text = f"Observation: {observation_content}\n\nThinking: {thinking_content}"
66
+
67
+ # Create a white panel (same height as the observation image)
68
+ panel = Image.new("RGB", (white_panel_width, obs_img.height), "white")
69
+ draw = ImageDraw.Draw(panel)
70
+ font = ImageFont.load_default()
71
+
72
+ # Wrap the action text if it is too long
73
+ max_chars_per_line = 40 # Adjusted for larger font size
74
+ wrapped_text = textwrap.fill(action_text, width=max_chars_per_line)
75
+
76
+ # Calculate text block size and center it on the panel
77
+ try:
78
+ # Use multiline_textbbox if available (returns bounding box tuple)
79
+ bbox = draw.multiline_textbbox((0, 0), wrapped_text, font=font)
80
+ text_width, text_height = bbox[2] - bbox[0], bbox[3] - bbox[1]
81
+ except AttributeError:
82
+ # Fallback for older Pillow versions: compute size for each line
83
+ lines = wrapped_text.splitlines() or [wrapped_text]
84
+ line_sizes = [draw.textsize(line, font=font) for line in lines]
85
+ text_width = max(width for width, _ in line_sizes)
86
+ text_height = sum(height for _, height in line_sizes)
87
+ text_x = (white_panel_width - text_width) // 2
88
+ text_y = (obs_img.height - text_height) // 2
89
+ draw.multiline_text((text_x, text_y), wrapped_text, fill="black", font=font, align="center")
90
+
91
+ # Create the combined frame by concatenating the observation image and the panel
92
+ total_width = obs_img.width + white_panel_width
93
+ combined_frame = Image.new("RGB", (total_width, obs_img.height))
94
+ combined_frame.paste(obs_img, (0, 0))
95
+ combined_frame.paste(panel, (obs_img.width, 0))
96
+ frames.append(combined_frame)
97
+
98
+ # Skip the Action record since it has been processed with this Observation
99
+ if i + 1 < len(history) and isinstance(history[i + 1], Action):
100
+ i += 2
101
+ else:
102
+ i += 1
103
+ else:
104
+ i += 1
105
+
106
+ if frames:
107
+ frames[0].save(output_path, save_all=True, append_images=frames[1:], duration=duration, loop=0)
108
+ else:
109
+ raise ValueError("No frames were generated from the Run object's history.")
110
+
111
+
112
+ # Example usage:
113
+ if __name__ == "__main__":
114
+ # This is a simple example to demonstrate usage.
115
+ # In practice, replace this with your actual Run object.
116
+ from proxy_lite.recorder import Run
117
+
118
+ dummy_run = Run.load("0abdb4cb-f289-48b0-ba13-35ed1210f7c1")
119
+
120
+ num_steps = int(len(dummy_run.history) / 2)
121
+ print(f"Number of steps: {num_steps}")
122
+ output_gif_path = "trajectory.gif"
123
+ create_run_gif(dummy_run, output_gif_path, duration=1000)
124
+ print(f"Trajectory GIF saved to {output_gif_path}")
src/proxy_lite/recorder.py CHANGED
@@ -38,6 +38,11 @@ class Run(BaseModel):
38
  created_at=str(datetime.datetime.now(datetime.UTC)),
39
  )
40
 
 
 
 
 
 
41
  @property
42
  def observations(self) -> list[Observation]:
43
  return [h for h in self.history if isinstance(h, Observation)]
 
38
  created_at=str(datetime.datetime.now(datetime.UTC)),
39
  )
40
 
41
+ @classmethod
42
+ def load(cls, run_id: str) -> Self:
43
+ with open(Path(__file__).parent.parent.parent / "local_trajectories" / f"{run_id}.json", "r") as f:
44
+ return cls(**json.load(f))
45
+
46
  @property
47
  def observations(self) -> list[Observation]:
48
  return [h for h in self.history if isinstance(h, Observation)]