kokoro-onnx-api-test

Running

bcci commited on Feb 8

Commit

05eca7a

verified ·

1 Parent(s): e334297

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -20,6 +20,8 @@ from huggingface_hub import snapshot_download
 import json
 from scipy.io.wavfile import write as write_wav
 # Load the configuration file
 config_file_path = 'config.json'  # Update this with the path to your config file
@@ -243,16 +245,20 @@ def tts_full(text: str, voice: str = "af_heart", speed: float = 1.0, format: str
     final_token = [[0, *tokens, 0]]
     audio = sess.run(None, dict(
         input_ids=final_token,
         style=ref_s,
         speed=np.ones(1, dtype=np.float32),
     ))[0]
     # Write the concatenated audio to an in-memory WAV or Opus file.
     sample_rate = 24000
-    audio = np.array(audio, dtype=np.float32)  # Ensure it's float32 first
     audio = (audio * 32767).astype(np.int16)   # Scale to int16 range
     # Flatten the array if it's 2D

 import json
 from scipy.io.wavfile import write as write_wav
+import time
 # Load the configuration file
 config_file_path = 'config.json'  # Update this with the path to your config file
     final_token = [[0, *tokens, 0]]
+    start_time = time.time()
     audio = sess.run(None, dict(
         input_ids=final_token,
         style=ref_s,
         speed=np.ones(1, dtype=np.float32),
     ))[0]
+    print(time.time()-start_time)
     # Write the concatenated audio to an in-memory WAV or Opus file.
     sample_rate = 24000
+    # audio = np.array(audio, dtype=np.float32)  # Ensure it's float32 first
     audio = (audio * 32767).astype(np.int16)   # Scale to int16 range
     # Flatten the array if it's 2D