ariankhalfani commited on
Commit
5bcaefa
·
verified ·
1 Parent(s): 709897b

Update LLMwithvoice.py

Browse files
Files changed (1) hide show
  1. LLMwithvoice.py +28 -14
LLMwithvoice.py CHANGED
@@ -1,10 +1,23 @@
1
  import requests
2
- from IPython.display import Audio
 
 
 
3
 
4
  # Hugging Face API URL for Roberta model
5
  API_URL_ROBERTA = "https://api-inference.huggingface.co/models/deepset/roberta-base-squad2"
6
- # Hugging Face API URL for text-to-speech model
7
- API_URL_TTS = "https://api-inference.huggingface.co/models/suno/bark"
 
 
 
 
 
 
 
 
 
 
8
 
9
  def query_roberta(api_token, payload):
10
  headers = {"Authorization": f"Bearer {api_token}"}
@@ -14,10 +27,15 @@ def query_roberta(api_token, payload):
14
  except ValueError:
15
  return {"error": "Invalid JSON response"}
16
 
17
- def query_tts(api_token, payload):
18
- headers = {"Authorization": f"Bearer {api_token}"}
19
- response = requests.post(API_URL_TTS, headers=headers, json=payload)
20
- return response.content
 
 
 
 
 
21
 
22
  def chat_with_roberta(api_token, question, context):
23
  payload = {
@@ -35,12 +53,8 @@ def chat_with_roberta(api_token, question, context):
35
  except (IndexError, KeyError):
36
  return f"Unexpected response structure: {response}"
37
 
38
- def generate_speech(api_token, text):
39
- payload = {"inputs": text}
40
- audio_bytes = query_tts(api_token, payload)
41
- return audio_bytes
42
-
43
  def gradio_interface(api_token, context, question):
44
  answer = chat_with_roberta(api_token, question, context)
45
- audio_bytes = generate_speech(api_token, answer)
46
- return answer, audio_bytes
 
 
1
  import requests
2
+ import torch
3
+ import soundfile as sf
4
+ from transformers import AutoTokenizer
5
+ from parler_tts import ParlerTTSForConditionalGeneration
6
 
7
  # Hugging Face API URL for Roberta model
8
  API_URL_ROBERTA = "https://api-inference.huggingface.co/models/deepset/roberta-base-squad2"
9
+
10
+ device = "cpu"
11
+ if torch.cuda.is_available():
12
+ device = "cuda:0"
13
+ if torch.backends.mps.is_available():
14
+ device = "mps"
15
+ if torch.xpu.is_available():
16
+ device = "xpu"
17
+ torch_dtype = torch.float16 if device != "cpu" else torch.float32
18
+
19
+ model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler_tts_mini_v0.1").to(device, dtype=torch_dtype)
20
+ tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler_tts_mini_v0.1")
21
 
22
  def query_roberta(api_token, payload):
23
  headers = {"Authorization": f"Bearer {api_token}"}
 
27
  except ValueError:
28
  return {"error": "Invalid JSON response"}
29
 
30
+ def generate_speech(prompt, description):
31
+ input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
32
+ prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
33
+
34
+ generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids).to(torch.float32)
35
+ audio_arr = generation.cpu().numpy().squeeze()
36
+ audio_path = "/mnt/data/parler_tts_out.wav"
37
+ sf.write(audio_path, audio_arr, model.config.sampling_rate)
38
+ return audio_path
39
 
40
  def chat_with_roberta(api_token, question, context):
41
  payload = {
 
53
  except (IndexError, KeyError):
54
  return f"Unexpected response structure: {response}"
55
 
 
 
 
 
 
56
  def gradio_interface(api_token, context, question):
57
  answer = chat_with_roberta(api_token, question, context)
58
+ description = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
59
+ audio_path = generate_speech(answer, description)
60
+ return answer, audio_path