ariankhalfani commited on
Commit
4b144cc
·
verified ·
1 Parent(s): b94b806

Update LLMwithvoice.py

Browse files
Files changed (1) hide show
  1. LLMwithvoice.py +18 -4
LLMwithvoice.py CHANGED
@@ -1,9 +1,10 @@
1
  import requests
2
  import torch
3
  import numpy as np
4
- import sounddevice as sd
5
  from transformers import AutoTokenizer
6
  from parler_tts import ParlerTTSForConditionalGeneration
 
 
7
 
8
  # Hugging Face API URL for Roberta model
9
  API_URL_ROBERTA = "https://api-inference.huggingface.co/models/deepset/roberta-base-squad2"
@@ -47,10 +48,23 @@ def generate_speech(answer):
47
  generation = model.generate(input_ids=input_ids).to(torch.float32)
48
  audio_arr = generation.cpu().numpy().squeeze()
49
 
50
- # Play the generated audio
 
 
 
 
 
 
 
 
51
  try:
52
- sd.play(audio_arr, samplerate=model.config.sampling_rate)
53
- sd.wait() # Wait until the audio is done playing
 
 
 
 
 
54
  except Exception as e:
55
  print(f"Error playing audio: {e}")
56
 
 
1
  import requests
2
  import torch
3
  import numpy as np
 
4
  from transformers import AutoTokenizer
5
  from parler_tts import ParlerTTSForConditionalGeneration
6
+ from pydub import AudioSegment
7
+ import simpleaudio as sa
8
 
9
  # Hugging Face API URL for Roberta model
10
  API_URL_ROBERTA = "https://api-inference.huggingface.co/models/deepset/roberta-base-squad2"
 
48
  generation = model.generate(input_ids=input_ids).to(torch.float32)
49
  audio_arr = generation.cpu().numpy().squeeze()
50
 
51
+ # Convert numpy array to audio segment
52
+ audio_segment = AudioSegment(
53
+ audio_arr.tobytes(),
54
+ frame_rate=model.config.sampling_rate,
55
+ sample_width=audio_arr.dtype.itemsize,
56
+ channels=1
57
+ )
58
+
59
+ # Play the audio using simpleaudio
60
  try:
61
+ play_obj = sa.play_buffer(
62
+ audio_segment.raw_data,
63
+ num_channels=1,
64
+ bytes_per_sample=audio_segment.sample_width,
65
+ sample_rate=audio_segment.frame_rate
66
+ )
67
+ play_obj.wait_done() # Wait until the audio is done playing
68
  except Exception as e:
69
  print(f"Error playing audio: {e}")
70