Spaces:
Running
on
A100
Running
on
A100
idan shenfeld
commited on
Commit
·
a5bbbdc
1
Parent(s):
e642d6d
code cleanup
Browse files- app/app.py +60 -62
app/app.py
CHANGED
@@ -32,15 +32,16 @@ TEXT_ONLY = (
|
|
32 |
|
33 |
def create_inference_client(
|
34 |
model: Optional[str] = None, base_url: Optional[str] = None
|
35 |
-
) -> InferenceClient:
|
36 |
"""Create an InferenceClient instance with the given model or environment settings.
|
37 |
This function will run the model locally if ZERO_GPU is set to True.
|
38 |
|
39 |
Args:
|
40 |
model: Optional model identifier to use. If not provided, will use environment settings.
|
|
|
41 |
|
42 |
Returns:
|
43 |
-
InferenceClient
|
44 |
"""
|
45 |
if ZERO_GPU:
|
46 |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
@@ -65,11 +66,17 @@ def create_inference_client(
|
|
65 |
CLIENT = create_inference_client()
|
66 |
|
67 |
|
68 |
-
def
|
69 |
-
"""
|
70 |
-
|
71 |
-
|
72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
74 |
# Check if persistent storage is available and writable
|
75 |
use_persistent = False
|
@@ -84,35 +91,44 @@ def load_languages() -> dict[str, str]:
|
|
84 |
print("Persistent storage exists but is not writable, falling back to local storage")
|
85 |
use_persistent = False
|
86 |
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
languages_path = local_path
|
92 |
-
|
93 |
-
# If persistent storage is available and writable but file doesn't exist yet,
|
94 |
-
# copy the local file to persistent storage
|
95 |
-
if use_persistent:
|
96 |
-
try:
|
97 |
-
# Ensure local file exists first
|
98 |
-
if local_path.exists():
|
99 |
-
import shutil
|
100 |
-
# Copy the file to persistent storage
|
101 |
-
shutil.copy(local_path, persistent_path)
|
102 |
-
languages_path = persistent_path
|
103 |
-
print(f"Copied languages to persistent storage at {persistent_path}")
|
104 |
-
else:
|
105 |
-
# Create an empty languages file in persistent storage
|
106 |
-
with open(persistent_path, "w", encoding="utf-8") as f:
|
107 |
-
json.dump({"English": "You are a helpful assistant."}, f, ensure_ascii=False, indent=2)
|
108 |
-
languages_path = persistent_path
|
109 |
-
print(f"Created new languages file in persistent storage at {persistent_path}")
|
110 |
-
except Exception as e:
|
111 |
-
print(f"Error setting up persistent storage: {e}")
|
112 |
-
languages_path = local_path # Fall back to local path if any error occurs
|
113 |
|
114 |
-
|
115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
|
117 |
|
118 |
# Initial load
|
@@ -250,6 +266,7 @@ def add_fake_like_data(
|
|
250 |
|
251 |
@spaces.GPU
|
252 |
def call_pipeline(messages: list, language: str):
|
|
|
253 |
if ZERO_GPU:
|
254 |
# Format the messages using the tokenizer's chat template
|
255 |
tokenizer = CLIENT["tokenizer"]
|
@@ -267,16 +284,14 @@ def call_pipeline(messages: list, language: str):
|
|
267 |
)
|
268 |
|
269 |
# Extract the generated content
|
270 |
-
|
271 |
-
return content
|
272 |
else:
|
273 |
response = CLIENT(
|
274 |
messages,
|
275 |
clean_up_tokenization_spaces=False,
|
276 |
max_length=2000,
|
277 |
)
|
278 |
-
|
279 |
-
return content
|
280 |
|
281 |
|
282 |
def respond(
|
@@ -284,11 +299,12 @@ def respond(
|
|
284 |
language: str,
|
285 |
temperature: Optional[float] = None,
|
286 |
seed: Optional[int] = None,
|
287 |
-
) -> list:
|
288 |
"""Respond to the user message with a system message
|
289 |
|
290 |
Return the history with the new message"""
|
291 |
messages = format_history_as_messages(history)
|
|
|
292 |
if ZERO_GPU:
|
293 |
content = call_pipeline(messages, language)
|
294 |
else:
|
@@ -300,6 +316,7 @@ def respond(
|
|
300 |
temperature=temperature,
|
301 |
)
|
302 |
content = response.choices[0].message.content
|
|
|
303 |
message = gr.ChatMessage(role="assistant", content=content)
|
304 |
history.append(message)
|
305 |
return history
|
@@ -492,26 +509,10 @@ def save_new_language(lang_name, system_prompt):
|
|
492 |
"""Save the new language and system prompt to persistent storage if available, otherwise to local file."""
|
493 |
global LANGUAGES # Access the global variable
|
494 |
|
495 |
-
#
|
496 |
-
|
497 |
local_path = Path(__file__).parent / "languages.json"
|
498 |
|
499 |
-
# Check if persistent storage is available and writable
|
500 |
-
use_persistent = False
|
501 |
-
if Path("/data").exists() and Path("/data").is_dir():
|
502 |
-
try:
|
503 |
-
# Test if we can write to the directory
|
504 |
-
test_file = Path("/data/write_test.tmp")
|
505 |
-
test_file.touch()
|
506 |
-
test_file.unlink() # Remove the test file
|
507 |
-
use_persistent = True
|
508 |
-
except (PermissionError, OSError):
|
509 |
-
print("Persistent storage exists but is not writable, falling back to local storage")
|
510 |
-
use_persistent = False
|
511 |
-
|
512 |
-
# Use persistent storage if available and writable, otherwise fall back to local file
|
513 |
-
languages_path = persistent_path if use_persistent else local_path
|
514 |
-
|
515 |
# Load existing languages
|
516 |
if languages_path.exists():
|
517 |
with open(languages_path, "r", encoding="utf-8") as f:
|
@@ -527,7 +528,7 @@ def save_new_language(lang_name, system_prompt):
|
|
527 |
json.dump(data, f, ensure_ascii=False, indent=2)
|
528 |
|
529 |
# If we're using persistent storage, also update the local file as backup
|
530 |
-
if use_persistent and local_path !=
|
531 |
try:
|
532 |
with open(local_path, "w", encoding="utf-8") as f:
|
533 |
json.dump(data, f, ensure_ascii=False, indent=2)
|
@@ -537,11 +538,8 @@ def save_new_language(lang_name, system_prompt):
|
|
537 |
# Update the global LANGUAGES variable with the new data
|
538 |
LANGUAGES.update({lang_name: system_prompt})
|
539 |
|
540 |
-
# Update the dropdown choices
|
541 |
-
new_choices = list(LANGUAGES.keys())
|
542 |
-
|
543 |
# Return a message that will trigger a JavaScript refresh
|
544 |
-
return gr.Group(visible=False), gr.HTML("<script>window.location.reload();</script>"), gr.Dropdown(choices=
|
545 |
|
546 |
|
547 |
css = """
|
|
|
32 |
|
33 |
def create_inference_client(
|
34 |
model: Optional[str] = None, base_url: Optional[str] = None
|
35 |
+
) -> InferenceClient | dict:
|
36 |
"""Create an InferenceClient instance with the given model or environment settings.
|
37 |
This function will run the model locally if ZERO_GPU is set to True.
|
38 |
|
39 |
Args:
|
40 |
model: Optional model identifier to use. If not provided, will use environment settings.
|
41 |
+
base_url: Optional base URL for the inference API.
|
42 |
|
43 |
Returns:
|
44 |
+
Either an InferenceClient instance or a dictionary with pipeline and tokenizer
|
45 |
"""
|
46 |
if ZERO_GPU:
|
47 |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
|
|
66 |
CLIENT = create_inference_client()
|
67 |
|
68 |
|
69 |
+
def get_persistent_storage_path(filename: str) -> tuple[Path, bool]:
|
70 |
+
"""Check if persistent storage is available and return the appropriate path.
|
71 |
+
|
72 |
+
Args:
|
73 |
+
filename: The name of the file to check/create
|
74 |
+
|
75 |
+
Returns:
|
76 |
+
A tuple containing (file_path, is_persistent)
|
77 |
+
"""
|
78 |
+
persistent_path = Path("/data") / filename
|
79 |
+
local_path = Path(__file__).parent / filename
|
80 |
|
81 |
# Check if persistent storage is available and writable
|
82 |
use_persistent = False
|
|
|
91 |
print("Persistent storage exists but is not writable, falling back to local storage")
|
92 |
use_persistent = False
|
93 |
|
94 |
+
return (persistent_path if use_persistent else local_path, use_persistent)
|
95 |
+
|
96 |
+
|
97 |
+
def load_languages() -> dict[str, str]:
|
98 |
+
"""Load languages from JSON file or persistent storage"""
|
99 |
+
languages_path, use_persistent = get_persistent_storage_path("languages.json")
|
100 |
+
local_path = Path(__file__).parent / "languages.json"
|
101 |
+
|
102 |
+
# If persistent storage is available but file doesn't exist yet,
|
103 |
+
# copy the local file to persistent storage
|
104 |
+
if use_persistent and not languages_path.exists():
|
105 |
+
try:
|
106 |
+
if local_path.exists():
|
107 |
+
import shutil
|
108 |
+
# Copy the file to persistent storage
|
109 |
+
shutil.copy(local_path, languages_path)
|
110 |
+
print(f"Copied languages to persistent storage at {languages_path}")
|
111 |
+
else:
|
112 |
+
# Create an empty languages file in persistent storage
|
113 |
+
with open(languages_path, "w", encoding="utf-8") as f:
|
114 |
+
json.dump({"English": "You are a helpful assistant."}, f, ensure_ascii=False, indent=2)
|
115 |
+
print(f"Created new languages file in persistent storage at {languages_path}")
|
116 |
+
except Exception as e:
|
117 |
+
print(f"Error setting up persistent storage: {e}")
|
118 |
+
languages_path = local_path # Fall back to local path if any error occurs
|
119 |
+
|
120 |
+
# If the file doesn't exist at the chosen path but exists at the local path, use local
|
121 |
+
if not languages_path.exists() and local_path.exists():
|
122 |
languages_path = local_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
+
# If the file exists, load it
|
125 |
+
if languages_path.exists():
|
126 |
+
with open(languages_path, "r", encoding="utf-8") as f:
|
127 |
+
return json.load(f)
|
128 |
+
else:
|
129 |
+
# Return a default if no file exists
|
130 |
+
default_languages = {"English": "You are a helpful assistant."}
|
131 |
+
return default_languages
|
132 |
|
133 |
|
134 |
# Initial load
|
|
|
266 |
|
267 |
@spaces.GPU
|
268 |
def call_pipeline(messages: list, language: str):
|
269 |
+
"""Call the appropriate model pipeline based on configuration"""
|
270 |
if ZERO_GPU:
|
271 |
# Format the messages using the tokenizer's chat template
|
272 |
tokenizer = CLIENT["tokenizer"]
|
|
|
284 |
)
|
285 |
|
286 |
# Extract the generated content
|
287 |
+
return response[0]["generated_text"]
|
|
|
288 |
else:
|
289 |
response = CLIENT(
|
290 |
messages,
|
291 |
clean_up_tokenization_spaces=False,
|
292 |
max_length=2000,
|
293 |
)
|
294 |
+
return response[0]["generated_text"][-1]["content"]
|
|
|
295 |
|
296 |
|
297 |
def respond(
|
|
|
299 |
language: str,
|
300 |
temperature: Optional[float] = None,
|
301 |
seed: Optional[int] = None,
|
302 |
+
) -> list:
|
303 |
"""Respond to the user message with a system message
|
304 |
|
305 |
Return the history with the new message"""
|
306 |
messages = format_history_as_messages(history)
|
307 |
+
|
308 |
if ZERO_GPU:
|
309 |
content = call_pipeline(messages, language)
|
310 |
else:
|
|
|
316 |
temperature=temperature,
|
317 |
)
|
318 |
content = response.choices[0].message.content
|
319 |
+
|
320 |
message = gr.ChatMessage(role="assistant", content=content)
|
321 |
history.append(message)
|
322 |
return history
|
|
|
509 |
"""Save the new language and system prompt to persistent storage if available, otherwise to local file."""
|
510 |
global LANGUAGES # Access the global variable
|
511 |
|
512 |
+
# Get the appropriate path
|
513 |
+
languages_path, use_persistent = get_persistent_storage_path("languages.json")
|
514 |
local_path = Path(__file__).parent / "languages.json"
|
515 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
516 |
# Load existing languages
|
517 |
if languages_path.exists():
|
518 |
with open(languages_path, "r", encoding="utf-8") as f:
|
|
|
528 |
json.dump(data, f, ensure_ascii=False, indent=2)
|
529 |
|
530 |
# If we're using persistent storage, also update the local file as backup
|
531 |
+
if use_persistent and local_path != languages_path:
|
532 |
try:
|
533 |
with open(local_path, "w", encoding="utf-8") as f:
|
534 |
json.dump(data, f, ensure_ascii=False, indent=2)
|
|
|
538 |
# Update the global LANGUAGES variable with the new data
|
539 |
LANGUAGES.update({lang_name: system_prompt})
|
540 |
|
|
|
|
|
|
|
541 |
# Return a message that will trigger a JavaScript refresh
|
542 |
+
return gr.Group(visible=False), gr.HTML("<script>window.location.reload();</script>"), gr.Dropdown(choices=list(LANGUAGES.keys()))
|
543 |
|
544 |
|
545 |
css = """
|