mgbam commited on
Commit
7833311
·
verified ·
1 Parent(s): 1ae58ff

Create utils.py

Browse files
Files changed (1) hide show
  1. utils.py +103 -0
utils.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /utils.py
2
+
3
+ """
4
+ A collection of utility functions for data manipulation and formatting.
5
+
6
+ This module provides helpers for tasks like converting chat history formats,
7
+ processing images for multimodal models, cleaning model outputs, and
8
+ applying code modifications.
9
+ """
10
+ import base64
11
+ import io
12
+ import re
13
+ from typing import Dict, List, Optional, Tuple
14
+
15
+ import numpy as np
16
+ from PIL import Image
17
+
18
+ from config import SEARCH_START, DIVIDER, REPLACE_END, GRADIO_SUPPORTED_LANGUAGES
19
+
20
+ # --- Type Definitions ---
21
+ History = List[Tuple[Optional[str], Optional[str]]]
22
+ Messages = List[Dict[str, any]]
23
+
24
+ # --- History and Message Conversion ---
25
+
26
+ def history_to_messages(history: History, system_prompt: str) -> Messages:
27
+ """Converts Gradio's history format to the list of messages format for an API call."""
28
+ messages = [{'role': 'system', 'content': system_prompt}]
29
+ for user_msg, assistant_msg in history:
30
+ # Handle potential multimodal user message (which comes as a list)
31
+ if isinstance(user_msg, list):
32
+ # Find the text part of the message for history
33
+ text_content = next((item.get("text", "") for item in user_msg if isinstance(item, dict) and item.get("type") == "text"), "")
34
+ messages.append({'role': 'user', 'content': text_content})
35
+ elif user_msg:
36
+ messages.append({'role': 'user', 'content': user_msg})
37
+
38
+ if assistant_msg:
39
+ messages.append({'role': 'assistant', 'content': assistant_msg})
40
+ return messages
41
+
42
+ def messages_to_history(messages: Messages) -> History:
43
+ """Converts a list of messages back to Gradio's history format."""
44
+ history = []
45
+ # Skip system message at index 0
46
+ for i in range(1, len(messages), 2):
47
+ user_msg = messages[i]['content']
48
+ assistant_msg = messages[i+1]['content'] if (i+1) < len(messages) else ""
49
+ history.append((user_msg, assistant_msg))
50
+ return history
51
+
52
+ # --- Image Processing ---
53
+
54
+ def process_image_for_model(image_data: np.ndarray) -> str:
55
+ """Converts a NumPy image array to a base64-encoded string."""
56
+ pil_img = Image.fromarray(image_data)
57
+ buffer = io.BytesIO()
58
+ pil_img.save(buffer, format="PNG")
59
+ img_str = base64.b64encode(buffer.getvalue()).decode("utf-8")
60
+ return f"data:image/png;base64,{img_str}"
61
+
62
+ # --- Code Manipulation ---
63
+
64
+ def remove_code_block(text: str) -> str:
65
+ """Extracts code from a markdown-style code block."""
66
+ pattern = r'```(?:[a-zA-Z]+)?\n(.*?)\n```'
67
+ match = re.search(pattern, text, re.DOTALL)
68
+ if match:
69
+ return match.group(1).strip()
70
+ return text.strip()
71
+
72
+ def apply_search_replace(original_code: str, change_block: str) -> str:
73
+ """Applies a single search-and-replace block to the code."""
74
+ try:
75
+ parts = re.split(f"^{DIVIDER}$", change_block, flags=re.MULTILINE)
76
+ if len(parts) != 2: return original_code # Invalid block
77
+
78
+ search_part, replace_part = parts
79
+ search_content = search_part.replace(SEARCH_START, "").strip()
80
+ replace_content = replace_part.replace(REPLACE_END, "").strip()
81
+
82
+ # To insert, search block is empty or just contains the line before insertion
83
+ if not search_content:
84
+ # Inserting at the beginning
85
+ return replace_content + "\n" + original_code
86
+
87
+ if search_content in original_code:
88
+ return original_code.replace(search_content, replace_content)
89
+ else:
90
+ # Handle insertion case where `search_content` is the line *before* insertion point
91
+ # and `replace_content` includes that line plus the new code.
92
+ # This is a common pattern LLMs use.
93
+ # We can simply return the original code, as more advanced logic is needed to reliably handle this.
94
+ print(f"Warning: Search block not found:\n---\n{search_content}\n---")
95
+ return original_code
96
+
97
+ except Exception as e:
98
+ print(f"Error applying changes: {e}")
99
+ return original_code
100
+
101
+ def get_gradio_language(language: str) -> Optional[str]:
102
+ """Returns the language name if supported by Gradio, otherwise None."""
103
+ return language if language in GRADIO_SUPPORTED_LANGUAGES else None