fixed long-text generation
Browse files
app.py
CHANGED
|
@@ -403,7 +403,7 @@ def infer_long_text(text, preset_prompt, prompt=None, language='auto', accent='n
|
|
| 403 |
)
|
| 404 |
complete_tokens = torch.cat([complete_tokens, encoded_frames.transpose(2, 1)], dim=-1)
|
| 405 |
# Decode with Vocos
|
| 406 |
-
frames =
|
| 407 |
features = vocos.codes_to_features(frames)
|
| 408 |
samples = vocos.decode(features, bandwidth_id=torch.tensor([2], device=device))
|
| 409 |
|
|
@@ -451,7 +451,7 @@ def infer_long_text(text, preset_prompt, prompt=None, language='auto', accent='n
|
|
| 451 |
audio_prompts = original_audio_prompts
|
| 452 |
text_prompts = original_text_prompts
|
| 453 |
# Decode with Vocos
|
| 454 |
-
frames =
|
| 455 |
features = vocos.codes_to_features(frames)
|
| 456 |
samples = vocos.decode(features, bandwidth_id=torch.tensor([2], device=device))
|
| 457 |
|
|
|
|
| 403 |
)
|
| 404 |
complete_tokens = torch.cat([complete_tokens, encoded_frames.transpose(2, 1)], dim=-1)
|
| 405 |
# Decode with Vocos
|
| 406 |
+
frames = complete_tokens.permute(1, 0, 2)
|
| 407 |
features = vocos.codes_to_features(frames)
|
| 408 |
samples = vocos.decode(features, bandwidth_id=torch.tensor([2], device=device))
|
| 409 |
|
|
|
|
| 451 |
audio_prompts = original_audio_prompts
|
| 452 |
text_prompts = original_text_prompts
|
| 453 |
# Decode with Vocos
|
| 454 |
+
frames = complete_tokens.permute(1, 0, 2)
|
| 455 |
features = vocos.codes_to_features(frames)
|
| 456 |
samples = vocos.decode(features, bandwidth_id=torch.tensor([2], device=device))
|
| 457 |
|