bcci commited on
Commit
565857a
·
verified ·
1 Parent(s): d5853ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -8
app.py CHANGED
@@ -94,10 +94,10 @@ def custom_split_text(text: str) -> list:
94
  candidate_end = len(words)
95
  chunk_words = words[start:candidate_end]
96
  split_index = None
97
- # for i in range(len(chunk_words) - 1):
98
- # if '.' in chunk_words[i]:
99
- # split_index = i
100
- # break
101
  if split_index is not None:
102
  candidate_end = start + split_index + 1
103
  chunk_words = words[start:candidate_end]
@@ -194,16 +194,18 @@ def tts_streaming(text: str, voice: str = "af_heart", speed: float = 1.0, format
194
  chunk_tokens = tokenizer(chunk)
195
 
196
  # For the first chunk, prepend 0; for later chunks, start with the previous chunk's last token.
197
- if i == 0:
198
- tokens_to_send = [0] + chunk_tokens + [0]
199
- else:
200
- tokens_to_send = [0] + prev_last_token + [16] + chunk_tokens + [0]
201
  # token_to_send = [0] + chunk_tokens
202
 
203
  # Save the last token of this chunk for the next iteration.
204
  prev_last_token = chunk_tokens[-1:]
205
 
206
  # Prepare the model input (a batch of one sequence).
 
 
207
  final_token = [tokens_to_send]
208
  print(final_token)
209
 
@@ -232,6 +234,7 @@ def tts_streaming(text: str, voice: str = "af_heart", speed: float = 1.0, format
232
 
233
  # Convert the model output (assumed to be float32 in [-1, 1]) to int16 PCM.
234
  audio_int16 = (audio_output * 32767).astype(np.int16).flatten()
 
235
 
236
  # Convert to a torch tensor (back into float range) for our helper functions.
237
  # audio_tensor = torch.from_numpy(audio_int16.astype(np.float32) / 32767)
 
94
  candidate_end = len(words)
95
  chunk_words = words[start:candidate_end]
96
  split_index = None
97
+ for i in range(len(chunk_words) - 1):
98
+ if '.' in chunk_words[i]:
99
+ split_index = i
100
+ break
101
  if split_index is not None:
102
  candidate_end = start + split_index + 1
103
  chunk_words = words[start:candidate_end]
 
194
  chunk_tokens = tokenizer(chunk)
195
 
196
  # For the first chunk, prepend 0; for later chunks, start with the previous chunk's last token.
197
+ # if i == 0:
198
+ # tokens_to_send = [0] + chunk_tokens + [0]
199
+ # else:
200
+ # tokens_to_send = [0] + chunk_tokens + [0]
201
  # token_to_send = [0] + chunk_tokens
202
 
203
  # Save the last token of this chunk for the next iteration.
204
  prev_last_token = chunk_tokens[-1:]
205
 
206
  # Prepare the model input (a batch of one sequence).
207
+ tokens_to_send = [0] + chunk_tokens + [0]
208
+
209
  final_token = [tokens_to_send]
210
  print(final_token)
211
 
 
234
 
235
  # Convert the model output (assumed to be float32 in [-1, 1]) to int16 PCM.
236
  audio_int16 = (audio_output * 32767).astype(np.int16).flatten()
237
+ print(audio_int16)
238
 
239
  # Convert to a torch tensor (back into float range) for our helper functions.
240
  # audio_tensor = torch.from_numpy(audio_int16.astype(np.float32) / 32767)