mgbam commited on
Commit
c5abecc
·
verified ·
1 Parent(s): 9b8ec81

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +133 -61
utils.py CHANGED
@@ -3,101 +3,173 @@
3
  """
4
  A collection of utility functions for data manipulation and formatting.
5
 
6
- This module provides helpers for tasks like converting chat history formats,
7
- processing images for multimodal models, cleaning model outputs, and
8
- applying code modifications.
 
 
9
  """
10
  import base64
11
  import io
12
  import re
13
- from typing import Dict, List, Optional, Tuple
 
14
 
15
  import numpy as np
16
  from PIL import Image
17
 
18
  from config import SEARCH_START, DIVIDER, REPLACE_END, GRADIO_SUPPORTED_LANGUAGES
19
 
20
- # --- Type Definitions ---
 
21
  History = List[Tuple[Optional[str], Optional[str]]]
22
- Messages = List[Dict[str, any]]
 
23
 
24
- # --- History and Message Conversion ---
 
 
 
25
 
26
  def history_to_messages(history: History, system_prompt: str) -> Messages:
27
- """Converts Gradio's history format to the list of messages format for an API call."""
28
- messages = [{'role': 'system', 'content': system_prompt}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  for user_msg, assistant_msg in history:
30
- # Handle potential multimodal user message (which comes as a list)
31
  if isinstance(user_msg, list):
32
- # Find the text part of the message for history
33
- text_content = next((item.get("text", "") for item in user_msg if isinstance(item, dict) and item.get("type") == "text"), "")
34
- messages.append({'role': 'user', 'content': text_content})
35
  elif user_msg:
36
- messages.append({'role': 'user', 'content': user_msg})
37
 
38
  if assistant_msg:
39
- messages.append({'role': 'assistant', 'content': assistant_msg})
40
  return messages
41
 
42
- def messages_to_history(messages: Messages) -> History:
43
- """Converts a list of messages back to Gradio's history format."""
44
- history = []
45
- # Skip system message at index 0
46
- for i in range(1, len(messages), 2):
47
- user_msg = messages[i]['content']
48
- assistant_msg = messages[i+1]['content'] if (i+1) < len(messages) else ""
49
- history.append((user_msg, assistant_msg))
50
- return history
51
-
52
- # --- Image Processing ---
53
 
54
  def process_image_for_model(image_data: np.ndarray) -> str:
55
- """Converts a NumPy image array to a base64-encoded string."""
 
 
 
 
 
 
 
 
56
  pil_img = Image.fromarray(image_data)
57
  buffer = io.BytesIO()
58
  pil_img.save(buffer, format="PNG")
59
  img_str = base64.b64encode(buffer.getvalue()).decode("utf-8")
60
  return f"data:image/png;base64,{img_str}"
61
 
62
- # --- Code Manipulation ---
63
-
64
  def remove_code_block(text: str) -> str:
65
- """Extracts code from a markdown-style code block."""
66
- pattern = r'```(?:[a-zA-Z]+)?\n(.*?)\n```'
67
- match = re.search(pattern, text, re.DOTALL)
68
- if match:
69
- return match.group(1).strip()
70
- return text.strip()
71
 
72
- def apply_search_replace(original_code: str, change_block: str) -> str:
73
- """Applies a single search-and-replace block to the code."""
74
- try:
75
- parts = re.split(f"^{DIVIDER}$", change_block, flags=re.MULTILINE)
76
- if len(parts) != 2: return original_code # Invalid block
77
-
78
- search_part, replace_part = parts
79
- search_content = search_part.replace(SEARCH_START, "").strip()
80
- replace_content = replace_part.replace(REPLACE_END, "").strip()
81
 
82
- # To insert, search block is empty or just contains the line before insertion
83
- if not search_content:
84
- # Inserting at the beginning
85
- return replace_content + "\n" + original_code
86
 
87
- if search_content in original_code:
88
- return original_code.replace(search_content, replace_content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  else:
90
- # Handle insertion case where `search_content` is the line *before* insertion point
91
- # and `replace_content` includes that line plus the new code.
92
- # This is a common pattern LLMs use.
93
- # We can simply return the original code, as more advanced logic is needed to reliably handle this.
94
- print(f"Warning: Search block not found:\n---\n{search_content}\n---")
95
- return original_code
96
-
97
- except Exception as e:
98
- print(f"Error applying changes: {e}")
99
- return original_code
 
 
 
 
100
 
101
  def get_gradio_language(language: str) -> Optional[str]:
102
- """Returns the language name if supported by Gradio, otherwise None."""
 
 
 
 
 
 
 
 
103
  return language if language in GRADIO_SUPPORTED_LANGUAGES else None
 
3
  """
4
  A collection of utility functions for data manipulation and formatting.
5
 
6
+ This module provides helpers for tasks such as:
7
+ - Converting between different chat history formats (internal state vs. API vs. UI).
8
+ - Processing images for multimodal language models.
9
+ - Extracting and modifying code based on specific patterns.
10
+ - Validating language support for UI components.
11
  """
12
  import base64
13
  import io
14
  import re
15
+ import logging
16
+ from typing import Dict, List, Optional, Tuple, Any
17
 
18
  import numpy as np
19
  from PIL import Image
20
 
21
  from config import SEARCH_START, DIVIDER, REPLACE_END, GRADIO_SUPPORTED_LANGUAGES
22
 
23
+ # --- Type Aliases for Clarity ---
24
+ # Internal history format: a list of (user, assistant) tuples.
25
  History = List[Tuple[Optional[str], Optional[str]]]
26
+ # API/Gradio message format: a list of OpenAI-style dictionaries.
27
+ Messages = List[Dict[str, Any]]
28
 
29
+
30
+ # --------------------------------------------------------------------------
31
+ # 1. HISTORY & MESSAGE CONVERSION
32
+ # --------------------------------------------------------------------------
33
 
34
  def history_to_messages(history: History, system_prompt: str) -> Messages:
35
+ """
36
+ Converts the internal history (list of tuples) to the API message format.
37
+
38
+ This format is required for making calls to the LLM API and includes the
39
+ system prompt at the beginning.
40
+
41
+ Args:
42
+ history: The conversation history as a list of (user, assistant) tuples.
43
+ system_prompt: The initial system prompt to guide the model.
44
+
45
+ Returns:
46
+ A list of message dictionaries in the format expected by the API.
47
+ """
48
+ messages: Messages = [{'role': 'system', 'content': system_prompt}]
49
+ for user_msg, assistant_msg in history:
50
+ if user_msg:
51
+ messages.append({'role': 'user', 'content': user_msg})
52
+ if assistant_msg:
53
+ messages.append({'role': 'assistant', 'content': assistant_msg})
54
+ return messages
55
+
56
+ def history_to_chatbot_messages(history: History) -> Messages:
57
+ """
58
+ Converts the internal history (list of tuples) to the Gradio Chatbot format.
59
+
60
+ The modern `gr.Chatbot` component with `type="messages"` expects a list of
61
+ dictionaries, excluding the system prompt.
62
+
63
+ Args:
64
+ history: The conversation history as a list of (user, assistant) tuples.
65
+
66
+ Returns:
67
+ A list of message dictionaries for display in the Gradio Chatbot UI.
68
+ """
69
+ messages: Messages = []
70
  for user_msg, assistant_msg in history:
71
+ # For display, we only care about the text part of a multimodal message
72
  if isinstance(user_msg, list):
73
+ display_text = next((item.get("text", "") for item in user_msg if isinstance(item, dict) and item.get("type") == "text"), "")
74
+ messages.append({"role": "user", "content": display_text})
 
75
  elif user_msg:
76
+ messages.append({"role": "user", "content": user_msg})
77
 
78
  if assistant_msg:
79
+ messages.append({"role": "assistant", "content": assistant_msg})
80
  return messages
81
 
82
+ # --------------------------------------------------------------------------
83
+ # 2. CONTENT & CODE PROCESSING
84
+ # --------------------------------------------------------------------------
 
 
 
 
 
 
 
 
85
 
86
  def process_image_for_model(image_data: np.ndarray) -> str:
87
+ """
88
+ Converts a NumPy image array from Gradio into a base64-encoded data URI.
89
+
90
+ Args:
91
+ image_data: The image as a NumPy array.
92
+
93
+ Returns:
94
+ A base64-encoded string formatted as a data URI for multimodal models.
95
+ """
96
  pil_img = Image.fromarray(image_data)
97
  buffer = io.BytesIO()
98
  pil_img.save(buffer, format="PNG")
99
  img_str = base64.b64encode(buffer.getvalue()).decode("utf-8")
100
  return f"data:image/png;base64,{img_str}"
101
 
 
 
102
  def remove_code_block(text: str) -> str:
103
+ """
104
+ Extracts code from a markdown-style code block.
 
 
 
 
105
 
106
+ This function robustly handles code blocks with or without language
107
+ specifiers and with varying whitespace.
 
 
 
 
 
 
 
108
 
109
+ Args:
110
+ text: The raw string from the model, potentially containing a code block.
 
 
111
 
112
+ Returns:
113
+ The extracted code, or the original text if no block is found.
114
+ """
115
+ pattern = r'```[a-zA-Z]*\s*\n?(.*?)\n?```'
116
+ match = re.search(pattern, text, re.DOTALL)
117
+ if match:
118
+ return match.group(1).strip()
119
+ return text.strip() # Fallback for when no code block is detected
120
+
121
+ def apply_search_replace_changes(original_code: str, changes_text: str) -> str:
122
+ """
123
+ Applies one or more SEARCH/REPLACE blocks to the original code.
124
+
125
+ This function iterates through all search/replace blocks in the given
126
+ `changes_text` and applies them sequentially to the `original_code`.
127
+
128
+ Args:
129
+ original_code: The starting code to be modified.
130
+ changes_text: A string containing one or more formatted change blocks.
131
+
132
+ Returns:
133
+ The modified code after all changes have been applied.
134
+ """
135
+ modified_code = original_code
136
+
137
+ # Define the pattern to find all SEARCH/REPLACE blocks
138
+ block_pattern = re.compile(
139
+ rf"^{SEARCH_START}\n(.*?)\n^{DIVIDER}\n(.*?)\n^{REPLACE_END}",
140
+ re.DOTALL | re.MULTILINE
141
+ )
142
+
143
+ for match in block_pattern.finditer(changes_text):
144
+ search_content = match.group(1)
145
+ replace_content = match.group(2)
146
+
147
+ if search_content in modified_code:
148
+ modified_code = modified_code.replace(search_content, replace_content, 1)
149
  else:
150
+ # Handle insertion case: if search block is empty, prepend.
151
+ if not search_content.strip():
152
+ modified_code = replace_content + "\n" + modified_code
153
+ else:
154
+ logging.warning(
155
+ f"Search block not found in the code. Skipping this change.\n"
156
+ f"--- BLOCK NOT FOUND ---\n{search_content}\n-----------------------"
157
+ )
158
+
159
+ return modified_code
160
+
161
+ # --------------------------------------------------------------------------
162
+ # 3. UI HELPERS
163
+ # --------------------------------------------------------------------------
164
 
165
  def get_gradio_language(language: str) -> Optional[str]:
166
+ """
167
+ Returns the language name if it is supported for syntax highlighting by Gradio.
168
+
169
+ Args:
170
+ language: The language identifier (e.g., "python", "html").
171
+
172
+ Returns:
173
+ The language string if supported, otherwise None.
174
+ """
175
  return language if language in GRADIO_SUPPORTED_LANGUAGES else None