Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -1114,16 +1114,20 @@ def generate_audio_parler_tts(text):
|
|
| 1114 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
| 1115 |
try:
|
| 1116 |
model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
|
| 1117 |
-
except
|
| 1118 |
-
print("
|
| 1119 |
-
|
| 1120 |
-
|
| 1121 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 1122 |
|
| 1123 |
description = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
|
| 1124 |
|
| 1125 |
-
|
| 1126 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1127 |
|
| 1128 |
max_input_length = model.config.n_positions - input_ids.shape[1]
|
| 1129 |
segments = [prompt_input_ids[0][i:i+max_input_length] for i in range(0, prompt_input_ids.shape[1], max_input_length)]
|
|
@@ -1131,7 +1135,12 @@ def generate_audio_parler_tts(text):
|
|
| 1131 |
audio_segments = []
|
| 1132 |
for segment in segments:
|
| 1133 |
segment = segment.unsqueeze(0)
|
| 1134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1135 |
audio_arr = generation.cpu().numpy().squeeze()
|
| 1136 |
audio_segments.append(audio_arr)
|
| 1137 |
|
|
@@ -1216,3 +1225,4 @@ demo.launch(share=True)
|
|
| 1216 |
|
| 1217 |
|
| 1218 |
|
|
|
|
|
|
| 1114 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
| 1115 |
try:
|
| 1116 |
model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
|
| 1117 |
+
except Exception as e:
|
| 1118 |
+
print(f"Error loading Parler TTS model: {e}")
|
| 1119 |
+
return None
|
| 1120 |
+
|
| 1121 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 1122 |
|
| 1123 |
description = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
|
| 1124 |
|
| 1125 |
+
try:
|
| 1126 |
+
input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
|
| 1127 |
+
prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
|
| 1128 |
+
except Exception as e:
|
| 1129 |
+
print(f"Error tokenizing input: {e}")
|
| 1130 |
+
return None
|
| 1131 |
|
| 1132 |
max_input_length = model.config.n_positions - input_ids.shape[1]
|
| 1133 |
segments = [prompt_input_ids[0][i:i+max_input_length] for i in range(0, prompt_input_ids.shape[1], max_input_length)]
|
|
|
|
| 1135 |
audio_segments = []
|
| 1136 |
for segment in segments:
|
| 1137 |
segment = segment.unsqueeze(0)
|
| 1138 |
+
try:
|
| 1139 |
+
generation = model.generate(input_ids=input_ids, prompt_input_ids=segment)
|
| 1140 |
+
except Exception as e:
|
| 1141 |
+
print(f"Error generating audio segment: {e}")
|
| 1142 |
+
return None
|
| 1143 |
+
|
| 1144 |
audio_arr = generation.cpu().numpy().squeeze()
|
| 1145 |
audio_segments.append(audio_arr)
|
| 1146 |
|
|
|
|
| 1225 |
|
| 1226 |
|
| 1227 |
|
| 1228 |
+
|