Spaces:
Runtime error
Runtime error
Update LLMwithvoice.py
Browse files- LLMwithvoice.py +18 -4
LLMwithvoice.py
CHANGED
@@ -1,9 +1,10 @@
|
|
1 |
import requests
|
2 |
import torch
|
3 |
import numpy as np
|
4 |
-
import sounddevice as sd
|
5 |
from transformers import AutoTokenizer
|
6 |
from parler_tts import ParlerTTSForConditionalGeneration
|
|
|
|
|
7 |
|
8 |
# Hugging Face API URL for Roberta model
|
9 |
API_URL_ROBERTA = "https://api-inference.huggingface.co/models/deepset/roberta-base-squad2"
|
@@ -47,10 +48,23 @@ def generate_speech(answer):
|
|
47 |
generation = model.generate(input_ids=input_ids).to(torch.float32)
|
48 |
audio_arr = generation.cpu().numpy().squeeze()
|
49 |
|
50 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
try:
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
54 |
except Exception as e:
|
55 |
print(f"Error playing audio: {e}")
|
56 |
|
|
|
1 |
import requests
|
2 |
import torch
|
3 |
import numpy as np
|
|
|
4 |
from transformers import AutoTokenizer
|
5 |
from parler_tts import ParlerTTSForConditionalGeneration
|
6 |
+
from pydub import AudioSegment
|
7 |
+
import simpleaudio as sa
|
8 |
|
9 |
# Hugging Face API URL for Roberta model
|
10 |
API_URL_ROBERTA = "https://api-inference.huggingface.co/models/deepset/roberta-base-squad2"
|
|
|
48 |
generation = model.generate(input_ids=input_ids).to(torch.float32)
|
49 |
audio_arr = generation.cpu().numpy().squeeze()
|
50 |
|
51 |
+
# Convert numpy array to audio segment
|
52 |
+
audio_segment = AudioSegment(
|
53 |
+
audio_arr.tobytes(),
|
54 |
+
frame_rate=model.config.sampling_rate,
|
55 |
+
sample_width=audio_arr.dtype.itemsize,
|
56 |
+
channels=1
|
57 |
+
)
|
58 |
+
|
59 |
+
# Play the audio using simpleaudio
|
60 |
try:
|
61 |
+
play_obj = sa.play_buffer(
|
62 |
+
audio_segment.raw_data,
|
63 |
+
num_channels=1,
|
64 |
+
bytes_per_sample=audio_segment.sample_width,
|
65 |
+
sample_rate=audio_segment.frame_rate
|
66 |
+
)
|
67 |
+
play_obj.wait_done() # Wait until the audio is done playing
|
68 |
except Exception as e:
|
69 |
print(f"Error playing audio: {e}")
|
70 |
|