Pijush2023 commited on
Commit
8959efa
·
verified ·
1 Parent(s): 90468b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -7
app.py CHANGED
@@ -1114,16 +1114,20 @@ def generate_audio_parler_tts(text):
1114
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
1115
  try:
1116
  model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
1117
- except torch.cuda.OutOfMemoryError:
1118
- print("CUDA out of memory. Switching to CPU.")
1119
- device = "cpu"
1120
- model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
1121
  tokenizer = AutoTokenizer.from_pretrained(model_id)
1122
 
1123
  description = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
1124
 
1125
- input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
1126
- prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
 
 
 
 
1127
 
1128
  max_input_length = model.config.n_positions - input_ids.shape[1]
1129
  segments = [prompt_input_ids[0][i:i+max_input_length] for i in range(0, prompt_input_ids.shape[1], max_input_length)]
@@ -1131,7 +1135,12 @@ def generate_audio_parler_tts(text):
1131
  audio_segments = []
1132
  for segment in segments:
1133
  segment = segment.unsqueeze(0)
1134
- generation = model.generate(input_ids=input_ids, prompt_input_ids=segment)
 
 
 
 
 
1135
  audio_arr = generation.cpu().numpy().squeeze()
1136
  audio_segments.append(audio_arr)
1137
 
@@ -1216,3 +1225,4 @@ demo.launch(share=True)
1216
 
1217
 
1218
 
 
 
1114
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
1115
  try:
1116
  model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
1117
+ except Exception as e:
1118
+ print(f"Error loading Parler TTS model: {e}")
1119
+ return None
1120
+
1121
  tokenizer = AutoTokenizer.from_pretrained(model_id)
1122
 
1123
  description = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
1124
 
1125
+ try:
1126
+ input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
1127
+ prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
1128
+ except Exception as e:
1129
+ print(f"Error tokenizing input: {e}")
1130
+ return None
1131
 
1132
  max_input_length = model.config.n_positions - input_ids.shape[1]
1133
  segments = [prompt_input_ids[0][i:i+max_input_length] for i in range(0, prompt_input_ids.shape[1], max_input_length)]
 
1135
  audio_segments = []
1136
  for segment in segments:
1137
  segment = segment.unsqueeze(0)
1138
+ try:
1139
+ generation = model.generate(input_ids=input_ids, prompt_input_ids=segment)
1140
+ except Exception as e:
1141
+ print(f"Error generating audio segment: {e}")
1142
+ return None
1143
+
1144
  audio_arr = generation.cpu().numpy().squeeze()
1145
  audio_segments.append(audio_arr)
1146
 
 
1225
 
1226
 
1227
 
1228
+