Coco-18 commited on
Commit
168acfa
·
verified ·
1 Parent(s): 6aa3d97

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -10
app.py CHANGED
@@ -38,7 +38,6 @@ for lang, path in MODELS.items():
38
  loaded_processors[lang] = None
39
 
40
  # Constants
41
- SAMPLE_RATE = 16000
42
  OUTPUT_DIR = "/tmp/"
43
  os.makedirs(OUTPUT_DIR, exist_ok=True)
44
 
@@ -71,19 +70,16 @@ def generate_tts():
71
  model = loaded_models[language]
72
  inputs = processor(text_input, return_tensors="pt")
73
 
74
- # Generate speech
75
  with torch.no_grad():
76
- output = model.generate(**inputs)
77
- # For VITS models, the output is typically a waveform
78
- # Check if output is a tuple/list or a single tensor
79
- if isinstance(output, tuple) or isinstance(output, list):
80
- waveform = output[0].cpu().numpy().squeeze()
81
- else:
82
- waveform = output.cpu().numpy().squeeze()
83
 
84
  # Save to file
85
  output_filename = os.path.join(OUTPUT_DIR, f"{language}_output.wav")
86
- sf.write(output_filename, waveform, SAMPLE_RATE)
 
 
87
  print(f"✅ Speech generated! File saved: {output_filename}")
88
 
89
  return jsonify({
 
38
  loaded_processors[lang] = None
39
 
40
  # Constants
 
41
  OUTPUT_DIR = "/tmp/"
42
  os.makedirs(OUTPUT_DIR, exist_ok=True)
43
 
 
70
  model = loaded_models[language]
71
  inputs = processor(text_input, return_tensors="pt")
72
 
73
+ # Generate speech - using model(**inputs) instead of model.generate()
74
  with torch.no_grad():
75
+ output = model(**inputs).waveform
76
+ waveform = output.squeeze().cpu().numpy()
 
 
 
 
 
77
 
78
  # Save to file
79
  output_filename = os.path.join(OUTPUT_DIR, f"{language}_output.wav")
80
+ # Use the model's sampling rate
81
+ sampling_rate = model.config.sampling_rate
82
+ sf.write(output_filename, waveform, sampling_rate)
83
  print(f"✅ Speech generated! File saved: {output_filename}")
84
 
85
  return jsonify({