|
|
|
|
|
""" |
|
A collection of utility functions for data manipulation and formatting. |
|
|
|
This module provides helpers for tasks such as: |
|
- Converting between different chat history formats (internal state vs. API vs. UI). |
|
- Processing images for multimodal language models. |
|
- Extracting and modifying code based on specific patterns. |
|
- Validating language support for UI components. |
|
""" |
|
import base64 |
|
import io |
|
import re |
|
import logging |
|
from typing import Dict, List, Optional, Tuple, Any |
|
|
|
import numpy as np |
|
from PIL import Image |
|
|
|
from config import SEARCH_START, DIVIDER, REPLACE_END, GRADIO_SUPPORTED_LANGUAGES |
|
|
|
|
|
|
|
History = List[Tuple[Optional[str], Optional[str]]] |
|
|
|
Messages = List[Dict[str, Any]] |
|
|
|
|
|
|
|
|
|
|
|
|
|
def history_to_messages(history: History, system_prompt: str) -> Messages: |
|
""" |
|
Converts the internal history (list of tuples) to the API message format. |
|
|
|
This format is required for making calls to the LLM API and includes the |
|
system prompt at the beginning. |
|
|
|
Args: |
|
history: The conversation history as a list of (user, assistant) tuples. |
|
system_prompt: The initial system prompt to guide the model. |
|
|
|
Returns: |
|
A list of message dictionaries in the format expected by the API. |
|
""" |
|
messages: Messages = [{'role': 'system', 'content': system_prompt}] |
|
for user_msg, assistant_msg in history: |
|
if user_msg: |
|
messages.append({'role': 'user', 'content': user_msg}) |
|
if assistant_msg: |
|
messages.append({'role': 'assistant', 'content': assistant_msg}) |
|
return messages |
|
|
|
def history_to_chatbot_messages(history: History) -> Messages: |
|
""" |
|
Converts the internal history (list of tuples) to the Gradio Chatbot format. |
|
|
|
The modern `gr.Chatbot` component with `type="messages"` expects a list of |
|
dictionaries, excluding the system prompt. |
|
|
|
Args: |
|
history: The conversation history as a list of (user, assistant) tuples. |
|
|
|
Returns: |
|
A list of message dictionaries for display in the Gradio Chatbot UI. |
|
""" |
|
messages: Messages = [] |
|
for user_msg, assistant_msg in history: |
|
|
|
if isinstance(user_msg, list): |
|
display_text = next((item.get("text", "") for item in user_msg if isinstance(item, dict) and item.get("type") == "text"), "") |
|
messages.append({"role": "user", "content": display_text}) |
|
elif user_msg: |
|
messages.append({"role": "user", "content": user_msg}) |
|
|
|
if assistant_msg: |
|
messages.append({"role": "assistant", "content": assistant_msg}) |
|
return messages |
|
|
|
|
|
|
|
|
|
|
|
def process_image_for_model(image_data: np.ndarray) -> str: |
|
""" |
|
Converts a NumPy image array from Gradio into a base64-encoded data URI. |
|
|
|
Args: |
|
image_data: The image as a NumPy array. |
|
|
|
Returns: |
|
A base64-encoded string formatted as a data URI for multimodal models. |
|
""" |
|
pil_img = Image.fromarray(image_data) |
|
buffer = io.BytesIO() |
|
pil_img.save(buffer, format="PNG") |
|
img_str = base64.b64encode(buffer.getvalue()).decode("utf-8") |
|
return f"data:image/png;base64,{img_str}" |
|
|
|
def remove_code_block(text: str) -> str: |
|
""" |
|
Extracts code from a markdown-style code block. |
|
|
|
This function robustly handles code blocks with or without language |
|
specifiers and with varying whitespace. |
|
|
|
Args: |
|
text: The raw string from the model, potentially containing a code block. |
|
|
|
Returns: |
|
The extracted code, or the original text if no block is found. |
|
""" |
|
pattern = r'```[a-zA-Z]*\s*\n?(.*?)\n?```' |
|
match = re.search(pattern, text, re.DOTALL) |
|
if match: |
|
return match.group(1).strip() |
|
return text.strip() |
|
|
|
def apply_search_replace_changes(original_code: str, changes_text: str) -> str: |
|
""" |
|
Applies one or more SEARCH/REPLACE blocks to the original code. |
|
|
|
This function iterates through all search/replace blocks in the given |
|
`changes_text` and applies them sequentially to the `original_code`. |
|
|
|
Args: |
|
original_code: The starting code to be modified. |
|
changes_text: A string containing one or more formatted change blocks. |
|
|
|
Returns: |
|
The modified code after all changes have been applied. |
|
""" |
|
modified_code = original_code |
|
|
|
|
|
block_pattern = re.compile( |
|
rf"^{SEARCH_START}\n(.*?)\n^{DIVIDER}\n(.*?)\n^{REPLACE_END}", |
|
re.DOTALL | re.MULTILINE |
|
) |
|
|
|
for match in block_pattern.finditer(changes_text): |
|
search_content = match.group(1) |
|
replace_content = match.group(2) |
|
|
|
if search_content in modified_code: |
|
modified_code = modified_code.replace(search_content, replace_content, 1) |
|
else: |
|
|
|
if not search_content.strip(): |
|
modified_code = replace_content + "\n" + modified_code |
|
else: |
|
logging.warning( |
|
f"Search block not found in the code. Skipping this change.\n" |
|
f"--- BLOCK NOT FOUND ---\n{search_content}\n-----------------------" |
|
) |
|
|
|
return modified_code |
|
|
|
|
|
|
|
|
|
|
|
def get_gradio_language(language: str) -> Optional[str]: |
|
""" |
|
Returns the language name if it is supported for syntax highlighting by Gradio. |
|
|
|
Args: |
|
language: The language identifier (e.g., "python", "html"). |
|
|
|
Returns: |
|
The language string if supported, otherwise None. |
|
""" |
|
return language if language in GRADIO_SUPPORTED_LANGUAGES else None |