Spaces:

mgbam
/

builder

Running

App Files Files Community

builder / utils.py

mgbam

Create utils.py

7833311 verified about 1 month ago

raw

history blame

4.19 kB

	# /utils.py

	"""
	A collection of utility functions for data manipulation and formatting.

	This module provides helpers for tasks like converting chat history formats,
	processing images for multimodal models, cleaning model outputs, and
	applying code modifications.
	"""
	import base64
	import io
	import re
	from typing import Dict, List, Optional, Tuple

	import numpy as np
	from PIL import Image

	from config import SEARCH_START, DIVIDER, REPLACE_END, GRADIO_SUPPORTED_LANGUAGES

	# --- Type Definitions ---
	History = List[Tuple[Optional[str], Optional[str]]]
	Messages = List[Dict[str, any]]

	# --- History and Message Conversion ---

	def history_to_messages(history: History, system_prompt: str) -> Messages:
	"""Converts Gradio's history format to the list of messages format for an API call."""
	messages = [{'role': 'system', 'content': system_prompt}]
	for user_msg, assistant_msg in history:
	# Handle potential multimodal user message (which comes as a list)
	if isinstance(user_msg, list):
	# Find the text part of the message for history
	text_content = next((item.get("text", "") for item in user_msg if isinstance(item, dict) and item.get("type") == "text"), "")
	messages.append({'role': 'user', 'content': text_content})
	elif user_msg:
	messages.append({'role': 'user', 'content': user_msg})

	if assistant_msg:
	messages.append({'role': 'assistant', 'content': assistant_msg})
	return messages

	def messages_to_history(messages: Messages) -> History:
	"""Converts a list of messages back to Gradio's history format."""
	history = []
	# Skip system message at index 0
	for i in range(1, len(messages), 2):
	user_msg = messages[i]['content']
	assistant_msg = messages[i+1]['content'] if (i+1) < len(messages) else ""
	history.append((user_msg, assistant_msg))
	return history

	# --- Image Processing ---

	def process_image_for_model(image_data: np.ndarray) -> str:
	"""Converts a NumPy image array to a base64-encoded string."""
	pil_img = Image.fromarray(image_data)
	buffer = io.BytesIO()
	pil_img.save(buffer, format="PNG")
	img_str = base64.b64encode(buffer.getvalue()).decode("utf-8")
	return f"data:image/png;base64,{img_str}"

	# --- Code Manipulation ---

	def remove_code_block(text: str) -> str:
	"""Extracts code from a markdown-style code block."""
	pattern = r'```(?:[a-zA-Z]+)?\n(.*?)\n```'
	match = re.search(pattern, text, re.DOTALL)
	if match:
	return match.group(1).strip()
	return text.strip()

	def apply_search_replace(original_code: str, change_block: str) -> str:
	"""Applies a single search-and-replace block to the code."""
	try:
	parts = re.split(f"^{DIVIDER}$", change_block, flags=re.MULTILINE)
	if len(parts) != 2: return original_code # Invalid block

	search_part, replace_part = parts
	search_content = search_part.replace(SEARCH_START, "").strip()
	replace_content = replace_part.replace(REPLACE_END, "").strip()

	# To insert, search block is empty or just contains the line before insertion
	if not search_content:
	# Inserting at the beginning
	return replace_content + "\n" + original_code

	if search_content in original_code:
	return original_code.replace(search_content, replace_content)
	else:
	# Handle insertion case where `search_content` is the line before insertion point
	# and `replace_content` includes that line plus the new code.
	# This is a common pattern LLMs use.
	# We can simply return the original code, as more advanced logic is needed to reliably handle this.
	print(f"Warning: Search block not found:\n---\n{search_content}\n---")
	return original_code

	except Exception as e:
	print(f"Error applying changes: {e}")
	return original_code

	def get_gradio_language(language: str) -> Optional[str]:
	"""Returns the language name if supported by Gradio, otherwise None."""
	return language if language in GRADIO_SUPPORTED_LANGUAGES else None