Pijush2023 commited on
Commit
d8c5725
·
verified ·
1 Parent(s): 1472828

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -1
app.py CHANGED
@@ -812,6 +812,78 @@ def generate_audio_elevenlabs(text):
812
 
813
 
814
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
815
  import concurrent.futures
816
  import tempfile
817
  import os
@@ -820,8 +892,8 @@ import logging
820
  from queue import Queue
821
  from threading import Thread
822
  from scipy.io.wavfile import write as write_wav
823
- from parler_tts import ParlerTTSForConditionalGeneration, ParlerTTSStreamer
824
  from transformers import AutoTokenizer
 
825
 
826
  # Ensure your device is set to CUDA
827
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
812
 
813
 
814
 
815
+ # import concurrent.futures
816
+ # import tempfile
817
+ # import os
818
+ # import numpy as np
819
+ # import logging
820
+ # from queue import Queue
821
+ # from threading import Thread
822
+ # from scipy.io.wavfile import write as write_wav
823
+ # from parler_tts import ParlerTTSForConditionalGeneration, ParlerTTSStreamer
824
+ # from transformers import AutoTokenizer
825
+
826
+ # # Ensure your device is set to CUDA
827
+ # device = "cuda:0" if torch.cuda.is_available() else "cpu"
828
+
829
+ # repo_id = "parler-tts/parler-tts-mini-v1"
830
+
831
+ # def generate_audio_parler_tts(text):
832
+ # description = "A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up."
833
+ # chunk_size_in_s = 0.5
834
+
835
+ # # Initialize the tokenizer and model
836
+ # parler_tokenizer = AutoTokenizer.from_pretrained(repo_id)
837
+ # parler_model = ParlerTTSForConditionalGeneration.from_pretrained(repo_id).to(device)
838
+ # sampling_rate = parler_model.audio_encoder.config.sampling_rate
839
+ # frame_rate = parler_model.audio_encoder.config.frame_rate
840
+
841
+ # def generate(text, description, play_steps_in_s=0.5):
842
+ # play_steps = int(frame_rate * play_steps_in_s)
843
+ # streamer = ParlerTTSStreamer(parler_model, device=device, play_steps=play_steps)
844
+
845
+ # inputs = parler_tokenizer(description, return_tensors="pt").to(device)
846
+ # prompt = parler_tokenizer(text, return_tensors="pt").to(device)
847
+
848
+ # generation_kwargs = dict(
849
+ # input_ids=inputs.input_ids,
850
+ # prompt_input_ids=prompt.input_ids,
851
+ # attention_mask=inputs.attention_mask,
852
+ # prompt_attention_mask=prompt.attention_mask,
853
+ # streamer=streamer,
854
+ # do_sample=True,
855
+ # temperature=1.0,
856
+ # min_new_tokens=10,
857
+ # )
858
+
859
+ # thread = Thread(target=parler_model.generate, kwargs=generation_kwargs)
860
+ # thread.start()
861
+
862
+ # for new_audio in streamer:
863
+ # if new_audio.shape[0] == 0:
864
+ # break
865
+ # # Save or process each audio chunk as it is generated
866
+ # yield sampling_rate, new_audio
867
+
868
+ # audio_segments = []
869
+ # for (sampling_rate, audio_chunk) in generate(text, description, chunk_size_in_s):
870
+ # audio_segments.append(audio_chunk)
871
+
872
+ # temp_audio_path = os.path.join(tempfile.gettempdir(), f"parler_tts_audio_chunk_{len(audio_segments)}.wav")
873
+ # write_wav(temp_audio_path, sampling_rate, audio_chunk.astype(np.float32))
874
+ # logging.debug(f"Saved chunk to {temp_audio_path}")
875
+
876
+
877
+ # # Combine all the audio chunks into one audio file
878
+ # combined_audio = np.concatenate(audio_segments)
879
+ # combined_audio_path = os.path.join(tempfile.gettempdir(), "parler_tts_combined_audio_stream.wav")
880
+
881
+ # write_wav(combined_audio_path, sampling_rate, combined_audio.astype(np.float32))
882
+
883
+ # logging.debug(f"Combined audio saved to {combined_audio_path}")
884
+ # return combined_audio_path
885
+
886
+
887
  import concurrent.futures
888
  import tempfile
889
  import os
 
892
  from queue import Queue
893
  from threading import Thread
894
  from scipy.io.wavfile import write as write_wav
 
895
  from transformers import AutoTokenizer
896
+ from parler_tts import ParlerTTSForConditionalGeneration, ParlerTTSStreamer
897
 
898
  # Ensure your device is set to CUDA
899
  device = "cuda:0" if torch.cuda.is_available() else "cpu"