Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -94,10 +94,10 @@ def custom_split_text(text: str) -> list:
|
|
94 |
candidate_end = len(words)
|
95 |
chunk_words = words[start:candidate_end]
|
96 |
split_index = None
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
if split_index is not None:
|
102 |
candidate_end = start + split_index + 1
|
103 |
chunk_words = words[start:candidate_end]
|
@@ -194,16 +194,18 @@ def tts_streaming(text: str, voice: str = "af_heart", speed: float = 1.0, format
|
|
194 |
chunk_tokens = tokenizer(chunk)
|
195 |
|
196 |
# For the first chunk, prepend 0; for later chunks, start with the previous chunk's last token.
|
197 |
-
if i == 0:
|
198 |
-
|
199 |
-
else:
|
200 |
-
tokens_to_send = [0] +
|
201 |
# token_to_send = [0] + chunk_tokens
|
202 |
|
203 |
# Save the last token of this chunk for the next iteration.
|
204 |
prev_last_token = chunk_tokens[-1:]
|
205 |
|
206 |
# Prepare the model input (a batch of one sequence).
|
|
|
|
|
207 |
final_token = [tokens_to_send]
|
208 |
print(final_token)
|
209 |
|
@@ -232,6 +234,7 @@ def tts_streaming(text: str, voice: str = "af_heart", speed: float = 1.0, format
|
|
232 |
|
233 |
# Convert the model output (assumed to be float32 in [-1, 1]) to int16 PCM.
|
234 |
audio_int16 = (audio_output * 32767).astype(np.int16).flatten()
|
|
|
235 |
|
236 |
# Convert to a torch tensor (back into float range) for our helper functions.
|
237 |
# audio_tensor = torch.from_numpy(audio_int16.astype(np.float32) / 32767)
|
|
|
94 |
candidate_end = len(words)
|
95 |
chunk_words = words[start:candidate_end]
|
96 |
split_index = None
|
97 |
+
for i in range(len(chunk_words) - 1):
|
98 |
+
if '.' in chunk_words[i]:
|
99 |
+
split_index = i
|
100 |
+
break
|
101 |
if split_index is not None:
|
102 |
candidate_end = start + split_index + 1
|
103 |
chunk_words = words[start:candidate_end]
|
|
|
194 |
chunk_tokens = tokenizer(chunk)
|
195 |
|
196 |
# For the first chunk, prepend 0; for later chunks, start with the previous chunk's last token.
|
197 |
+
# if i == 0:
|
198 |
+
# tokens_to_send = [0] + chunk_tokens + [0]
|
199 |
+
# else:
|
200 |
+
# tokens_to_send = [0] + chunk_tokens + [0]
|
201 |
# token_to_send = [0] + chunk_tokens
|
202 |
|
203 |
# Save the last token of this chunk for the next iteration.
|
204 |
prev_last_token = chunk_tokens[-1:]
|
205 |
|
206 |
# Prepare the model input (a batch of one sequence).
|
207 |
+
tokens_to_send = [0] + chunk_tokens + [0]
|
208 |
+
|
209 |
final_token = [tokens_to_send]
|
210 |
print(final_token)
|
211 |
|
|
|
234 |
|
235 |
# Convert the model output (assumed to be float32 in [-1, 1]) to int16 PCM.
|
236 |
audio_int16 = (audio_output * 32767).astype(np.int16).flatten()
|
237 |
+
print(audio_int16)
|
238 |
|
239 |
# Convert to a torch tensor (back into float range) for our helper functions.
|
240 |
# audio_tensor = torch.from_numpy(audio_int16.astype(np.float32) / 32767)
|