ajsbsd commited on
Commit
776566f
·
1 Parent(s): aca2abc
Files changed (3) hide show
  1. app_local_semi.py +260 -0
  2. festival_app.py +65 -0
  3. festival_test.py +30 -0
app_local_semi.py ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import os
4
+ import time
5
+ import subprocess
6
+ import tempfile
7
+
8
+ # --- Try to import ctransformers for GGUF, provide helpful message if not found ---
9
+ try:
10
+ from ctransformers import AutoModelForCausalLM as AutoModelForCausalLM_GGUF
11
+ from ctransformers.llm import LLM
12
+ from transformers import AutoTokenizer, AutoModelForCausalLM
13
+ GGUF_AVAILABLE = True
14
+ except ImportError:
15
+ GGUF_AVAILABLE = False
16
+ print("WARNING: 'ctransformers' not found. This app relies on it for efficient CPU inference.")
17
+ print("Please install it with: pip install ctransformers transformers")
18
+ from transformers import AutoTokenizer, AutoModelForCausalLM
19
+
20
+ # --- Configuration for Models and Generation ---
21
+ ORIGINAL_MODEL_ID = "HuggingFaceTB/SmolLM2-360M-Instruct"
22
+ GGUF_MODEL_ID = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
23
+ GGUF_MODEL_FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
24
+
25
+ # --- Generation Parameters ---
26
+ MAX_NEW_TOKENS = 256
27
+ TEMPERATURE = 0.7
28
+ TOP_K = 50
29
+ TOP_P = 0.95
30
+ DO_SAMPLE = True # This parameter is primarily for Hugging Face transformers.Model.generate()
31
+
32
+ # Global model and tokenizer
33
+ model = None
34
+ tokenizer = None
35
+ device = "cpu"
36
+
37
+ # --- Festival Audio Function ---
38
+ def speak_text_festival_to_file(text):
39
+ """
40
+ Uses Festival to speak the given text and saves the output to a temporary WAV file.
41
+ Returns the path to the generated audio file, or None on error.
42
+ """
43
+ if not text.strip():
44
+ print("No text provided for Festival to speak.")
45
+ return None
46
+
47
+ # Create a temporary WAV file for Festival output
48
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
49
+ audio_filepath = temp_audio_file.name
50
+
51
+ try:
52
+ # Festival command to synthesize text and save to a WAV file
53
+ festival_command = f"""
54
+ (set! utt (SayText "{text.replace('"', '\\"')}"))
55
+ (utt.save.wave utt "{audio_filepath}")
56
+ """
57
+
58
+ # Execute Festival via subprocess
59
+ process = subprocess.Popen(['festival', '--pipe'],
60
+ stdin=subprocess.PIPE,
61
+ stdout=subprocess.PIPE,
62
+ stderr=subprocess.PIPE,
63
+ text=True)
64
+ stdout, stderr = process.communicate(input=festival_command)
65
+
66
+ if process.returncode != 0:
67
+ print(f"Error speaking text with Festival. Return code: {process.returncode}")
68
+ print(f"Festival stderr: {stderr}")
69
+ if os.path.exists(audio_filepath):
70
+ os.remove(audio_filepath)
71
+ return None
72
+
73
+ if not os.path.exists(audio_filepath) or os.path.getsize(audio_filepath) == 0:
74
+ print(f"Festival did not create a valid WAV file at {audio_filepath}. Stderr: {stderr}")
75
+ if os.path.exists(audio_filepath):
76
+ os.remove(audio_filepath)
77
+ return None
78
+
79
+ print(f"Audio saved to: {audio_filepath}")
80
+ return audio_filepath
81
+
82
+ except FileNotFoundError:
83
+ print("Error: Festival executable not found. Make sure Festival is installed and in your PATH.")
84
+ if os.path.exists(audio_filepath):
85
+ os.remove(audio_filepath)
86
+ return None
87
+ except Exception as e:
88
+ print(f"An unexpected error occurred during Festival processing: {e}")
89
+ if os.path.exists(audio_filepath):
90
+ os.remove(audio_filepath)
91
+ return None
92
+
93
+ # --- Model Loading Function ---
94
+ def load_model_for_zerocpu():
95
+ global model, tokenizer, device
96
+
97
+ if GGUF_AVAILABLE:
98
+ print(f"Attempting to load GGUF model '{GGUF_MODEL_ID}' (file: '{GGUF_MODEL_FILENAME}') for ZeroCPU...")
99
+ try:
100
+ model = AutoModelForCausalLM_GGUF.from_pretrained(
101
+ GGUF_MODEL_ID,
102
+ model_file=GGUF_MODEL_FILENAME,
103
+ model_type="llama",
104
+ gpu_layers=0
105
+ )
106
+ tokenizer = AutoTokenizer.from_pretrained(ORIGINAL_MODEL_ID)
107
+ if tokenizer.pad_token is None:
108
+ tokenizer.pad_token = tokenizer.eos_token
109
+ print(f"GGUF model '{GGUF_MODEL_ID}' loaded successfully for CPU.")
110
+ return
111
+ except Exception as e:
112
+ print(f"WARNING: Could not load GGUF model '{GGUF_MODEL_ID}' from '{GGUF_MODEL_FILENAME}': {e}")
113
+ print(f"Falling back to standard Hugging Face model '{ORIGINAL_MODEL_ID}' for CPU (will be slower without GGUF quantization).")
114
+ else:
115
+ print("WARNING: ctransformers is not available. Will load standard Hugging Face model directly.")
116
+
117
+ print(f"Loading standard Hugging Face model '{ORIGINAL_MODEL_ID}' for CPU...")
118
+ try:
119
+ model = AutoModelForCausalLM.from_pretrained(ORIGINAL_MODEL_ID)
120
+ tokenizer = AutoTokenizer.from_pretrained(ORIGINAL_MODEL_ID)
121
+ if tokenizer.pad_token is None:
122
+ tokenizer.pad_token = tokenizer.eos_token
123
+ model.to(device)
124
+ print(f"Standard model '{ORIGINAL_MODEL_ID}' loaded successfully on CPU.")
125
+ except Exception as e:
126
+ print(f"CRITICAL ERROR: Could not load standard model '{ORIGINAL_MODEL_ID}' on CPU: {e}")
127
+ print("Please ensure the model ID is correct, you have enough RAM, and dependencies are installed.")
128
+ model = None
129
+ tokenizer = None
130
+
131
+ # --- Inference Function for Gradio Blocks ---
132
+ # This function yields tuples for streaming text and then the final audio.
133
+ def predict_chat_with_audio_and_streaming(message: str, history: list):
134
+ if model is None or tokenizer is None:
135
+ # history will now be a list of dictionaries, so yield accordingly
136
+ yield history + [{"role": "user", "content": message}, {"role": "assistant", "content": "Error: Model or tokenizer failed to load."}], None
137
+ return
138
+
139
+ # Initialize llm_messages with a system message
140
+ llm_messages = [{"role": "system", "content": "You are a friendly chatbot."}]
141
+
142
+ # Iterate through the history (list of dictionaries) and convert it to the LLM message format
143
+ # The history from Gradio's Chatbot (type='messages') is already in the desired format
144
+ for item in history:
145
+ llm_messages.append(item)
146
+
147
+ # Add the current user message
148
+ llm_messages.append({"role": "user", "content": message})
149
+
150
+ generated_text = ""
151
+ start_time = time.time()
152
+
153
+ if GGUF_AVAILABLE and isinstance(model, LLM):
154
+ prompt_input = tokenizer.apply_chat_template(llm_messages, tokenize=False, add_generation_prompt=True)
155
+ for token in model(
156
+ prompt_input,
157
+ max_new_tokens=MAX_NEW_TOKENS,
158
+ temperature=TEMPERATURE,
159
+ top_k=TOP_K,
160
+ top_p=TOP_P,
161
+ repetition_penalty=1.1,
162
+ stop=["User:", "\nUser", "\n#", "\n##", "<|endoftext|>", "<|im_end|>"],
163
+ stream=True
164
+ ):
165
+ generated_text += token
166
+ # Strip common special tokens before yielding
167
+ cleaned_text = generated_text.replace("<|im_end|>", "").replace("<|endoftext|>", "").strip()
168
+ # Yield the current state of history (list of dictionaries) and an empty audio output for streaming text
169
+ yield history + [{"role": "user", "content": message}, {"role": "assistant", "content": cleaned_text}], None
170
+ else:
171
+ input_text = tokenizer.apply_chat_template(llm_messages, tokenize=False, add_generation_prompt=True)
172
+ inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
173
+ outputs = model.generate(
174
+ inputs,
175
+ max_length=inputs.shape[-1] + MAX_NEW_TOKENS,
176
+ temperature=TEMPERATURE,
177
+ top_k=TOP_K,
178
+ top_p=TOP_P,
179
+ do_sample=DO_SAMPLE,
180
+ pad_token_id=tokenizer.pad_token_id
181
+ )
182
+ generated_text = tokenizer.decode(outputs[0][inputs.shape[-1]:], skip_special_tokens=True).strip()
183
+ # Strip common special tokens from the final generated text
184
+ generated_text = generated_text.replace("<|im_end|>", "").replace("<|endoftext|>", "").strip()
185
+ # Yield the full text response before audio generation
186
+ yield history + [{"role": "user", "content": message}, {"role": "assistant", "content": generated_text}], None
187
+
188
+ end_time = time.time()
189
+ print(f"Inference Time for this turn: {end_time - start_time:.2f} seconds")
190
+
191
+ # After streaming is complete and full text is gathered
192
+ audio_file_path = speak_text_festival_to_file(generated_text)
193
+
194
+ # Yield the final state with audio file
195
+ yield history + [{"role": "user", "content": message}, {"role": "assistant", "content": generated_text}], audio_file_path
196
+
197
+
198
+ # --- Gradio Interface Setup ---
199
+ if __name__ == "__main__":
200
+ load_model_for_zerocpu()
201
+
202
+ # chatbot_initial_value is already in the correct format for type='messages'
203
+ chatbot_initial_value = [{"role": "assistant", "content": "Hello! I'm an AI assistant. I'm currently running in a CPU-only environment for efficient demonstration. How can I help you today?"}]
204
+
205
+ # Gradio Blocks for more flexible layout
206
+ with gr.Blocks(theme="soft", title="SmolLM2-360M-Instruct (or TinyLlama GGUF) on ZeroCPU with Festival TTS") as demo:
207
+ gr.Markdown(
208
+ """
209
+ # SmolLM2-360M-Instruct (or TinyLlama GGUF) on ZeroCPU with Festival TTS
210
+ This Space demonstrates an LLM for efficient CPU-only inference.
211
+ **Note:** For ZeroCPU, this app prioritizes `tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf` (a GGUF-quantized model
212
+ like TinyLlama) due to better CPU performance than `HuggingFaceTB/SmolLM2-360M-Instruct`
213
+ without GGUF. Expect varied responses each run due to randomized generation.
214
+ **Festival TTS:** The chatbot's responses will also be spoken aloud using the local Festival Speech Synthesis System.
215
+ """
216
+ )
217
+
218
+ # The main Chatbot display component
219
+ chatbot_display = gr.Chatbot(value=chatbot_initial_value, height=500, label="Chat History", type='messages')
220
+
221
+ # Audio component for the last response
222
+ audio_output = gr.Audio(label="Chatbot Audio Response", type="filepath", autoplay=True)
223
+
224
+ # Textbox for user input
225
+ msg = gr.Textbox(placeholder="Ask me a question...", container=False, scale=7)
226
+
227
+ # Submit button
228
+ submit_btn = gr.Button("Send")
229
+
230
+ # Define example inputs for the textbox
231
+ # For examples, when type='messages', it expects a list of lists where each inner list
232
+ # represents a user message for the input textbox. The output is still the chat history.
233
+ examples_data = [
234
+ ["What is the capital of France?"],
235
+ ["Can you tell me a fun fact about outer space?"],
236
+ ["What's the best way to stay motivated?"],
237
+ ]
238
+
239
+ # Gradio Examples
240
+ gr.Examples(
241
+ examples=examples_data,
242
+ inputs=[msg],
243
+ fn=predict_chat_with_audio_and_streaming,
244
+ outputs=[chatbot_display, audio_output],
245
+ cache_examples=False,
246
+ )
247
+
248
+ # Event listeners for submission
249
+ msg.submit(predict_chat_with_audio_and_streaming,
250
+ inputs=[msg, chatbot_display],
251
+ outputs=[chatbot_display, audio_output])
252
+ submit_btn.click(predict_chat_with_audio_and_streaming,
253
+ inputs=[msg, chatbot_display],
254
+ outputs=[chatbot_display, audio_output])
255
+
256
+ # Clear textbox after submission for better UX
257
+ msg.submit(lambda: "", outputs=[msg])
258
+ submit_btn.click(lambda: "", outputs=[msg])
259
+
260
+ demo.launch()
festival_app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import subprocess
3
+ import os
4
+ import tempfile
5
+
6
+ def speak_text_via_festival(text):
7
+ """
8
+ Uses Festival to speak the given text and returns the path to the generated audio file.
9
+ """
10
+ if not text:
11
+ return None
12
+
13
+ # Create a temporary WAV file for Festival output
14
+ # Using tempfile to ensure unique and safely managed temporary files
15
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
16
+ audio_filepath = temp_audio_file.name
17
+
18
+ try:
19
+ # Command to make Festival speak and output to a WAV file
20
+ # (audio_mode 'wav) makes it output to a file instead of direct playback
21
+ # (utt.save.wave utt "filename.wav") saves the utterance
22
+ festival_command = f"""
23
+ (set! utt (SayText "{text}"))
24
+ (utt.save.wave utt "{audio_filepath}")
25
+ """
26
+
27
+ process = subprocess.Popen(['festival', '--pipe'],
28
+ stdin=subprocess.PIPE,
29
+ stdout=subprocess.PIPE,
30
+ stderr=subprocess.PIPE,
31
+ text=True)
32
+ stdout, stderr = process.communicate(input=festival_command)
33
+
34
+ if process.returncode != 0:
35
+ print(f"Error speaking text with Festival: {stderr}")
36
+ if os.path.exists(audio_filepath):
37
+ os.remove(audio_filepath) # Clean up partial file
38
+ return None
39
+
40
+ # Gradio's gr.Audio component expects a path to the audio file
41
+ return audio_filepath
42
+
43
+ except FileNotFoundError:
44
+ print("Error: Festival executable not found. Make sure Festival is installed and in your PATH.")
45
+ if os.path.exists(audio_filepath):
46
+ os.remove(audio_filepath)
47
+ return None
48
+ except Exception as e:
49
+ print(f"An unexpected error occurred: {e}")
50
+ if os.path.exists(audio_filepath):
51
+ os.remove(audio_filepath)
52
+ return None
53
+
54
+ # Define the Gradio Interface
55
+ iface = gr.Interface(
56
+ fn=speak_text_via_festival,
57
+ inputs=gr.Textbox(lines=2, label="Enter text for Festival TTS:"),
58
+ outputs=gr.Audio(label="Generated Audio", type="filepath", autoplay=True),
59
+ title="Festival TTS with Gradio",
60
+ description="Enter text to synthesize speech using the local Festival system."
61
+ )
62
+
63
+ # Launch the Gradio app
64
+ if __name__ == "__main__":
65
+ iface.launch()
festival_test.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+
3
+ def speak_text_festival(text):
4
+ """
5
+ Uses Festival to speak the given text.
6
+ """
7
+ command = f'(SayText "{text}")'
8
+ try:
9
+ # Popen is used to run the Festival command.
10
+ # We pass the command to Festival's standard input.
11
+ process = subprocess.Popen(['festival', '--pipe'],
12
+ stdin=subprocess.PIPE,
13
+ stdout=subprocess.PIPE,
14
+ stderr=subprocess.PIPE,
15
+ text=True) # text=True for string input/output
16
+ stdout, stderr = process.communicate(input=command)
17
+
18
+ if process.returncode != 0:
19
+ print(f"Error speaking text with Festival: {stderr}")
20
+ # else:
21
+ # print(f"Festival output: {stdout}") # Uncomment to see Festival's stdout
22
+
23
+ except FileNotFoundError:
24
+ print("Error: Festival executable not found. Make sure Festival is installed and in your PATH.")
25
+ except Exception as e:
26
+ print(f"An unexpected error occurred: {e}")
27
+
28
+ # Example usage:
29
+ speak_text_festival("Good morning, welcome to Festival.")
30
+ speak_text_festival("This is an example of Python interacting with Festival.")