Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -1114,16 +1114,20 @@ def generate_audio_parler_tts(text):
|
|
1114 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
1115 |
try:
|
1116 |
model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
|
1117 |
-
except
|
1118 |
-
print("
|
1119 |
-
|
1120 |
-
|
1121 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
1122 |
|
1123 |
description = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
|
1124 |
|
1125 |
-
|
1126 |
-
|
|
|
|
|
|
|
|
|
1127 |
|
1128 |
max_input_length = model.config.n_positions - input_ids.shape[1]
|
1129 |
segments = [prompt_input_ids[0][i:i+max_input_length] for i in range(0, prompt_input_ids.shape[1], max_input_length)]
|
@@ -1131,7 +1135,12 @@ def generate_audio_parler_tts(text):
|
|
1131 |
audio_segments = []
|
1132 |
for segment in segments:
|
1133 |
segment = segment.unsqueeze(0)
|
1134 |
-
|
|
|
|
|
|
|
|
|
|
|
1135 |
audio_arr = generation.cpu().numpy().squeeze()
|
1136 |
audio_segments.append(audio_arr)
|
1137 |
|
@@ -1216,3 +1225,4 @@ demo.launch(share=True)
|
|
1216 |
|
1217 |
|
1218 |
|
|
|
|
1114 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
1115 |
try:
|
1116 |
model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
|
1117 |
+
except Exception as e:
|
1118 |
+
print(f"Error loading Parler TTS model: {e}")
|
1119 |
+
return None
|
1120 |
+
|
1121 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
1122 |
|
1123 |
description = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
|
1124 |
|
1125 |
+
try:
|
1126 |
+
input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
|
1127 |
+
prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
|
1128 |
+
except Exception as e:
|
1129 |
+
print(f"Error tokenizing input: {e}")
|
1130 |
+
return None
|
1131 |
|
1132 |
max_input_length = model.config.n_positions - input_ids.shape[1]
|
1133 |
segments = [prompt_input_ids[0][i:i+max_input_length] for i in range(0, prompt_input_ids.shape[1], max_input_length)]
|
|
|
1135 |
audio_segments = []
|
1136 |
for segment in segments:
|
1137 |
segment = segment.unsqueeze(0)
|
1138 |
+
try:
|
1139 |
+
generation = model.generate(input_ids=input_ids, prompt_input_ids=segment)
|
1140 |
+
except Exception as e:
|
1141 |
+
print(f"Error generating audio segment: {e}")
|
1142 |
+
return None
|
1143 |
+
|
1144 |
audio_arr = generation.cpu().numpy().squeeze()
|
1145 |
audio_segments.append(audio_arr)
|
1146 |
|
|
|
1225 |
|
1226 |
|
1227 |
|
1228 |
+
|