File size: 2,578 Bytes
9f5785e
5bcaefa
a5f7b84
5bcaefa
 
4b144cc
 
9f5785e
 
 
5bcaefa
 
 
 
 
 
 
 
 
 
 
 
9f5785e
0b90662
 
 
 
 
 
 
9f5785e
 
 
a5f7b84
 
 
 
 
 
 
 
 
 
9f5785e
0b90662
 
5bcaefa
0b90662
5bcaefa
5ddeb31
4b144cc
 
 
 
 
 
 
 
 
5ddeb31
4b144cc
 
 
 
 
 
 
5ddeb31
a5f7b84
5ddeb31
0b90662
 
a5f7b84
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import requests
import torch
import numpy as np
from transformers import AutoTokenizer
from parler_tts import ParlerTTSForConditionalGeneration
from pydub import AudioSegment
import simpleaudio as sa

# Hugging Face API URL for Roberta model
API_URL_ROBERTA = "https://api-inference.huggingface.co/models/deepset/roberta-base-squad2"

device = "cpu"
if torch.cuda.is_available():
    device = "cuda:0"
if torch.backends.mps.is_available():
    device = "mps"
if torch.xpu.is_available():
    device = "xpu"
torch_dtype = torch.float16 if device != "cpu" else torch.float32

model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler_tts_mini_v0.1").to(device, dtype=torch_dtype)
tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler_tts_mini_v0.1")

def query_roberta(api_token, prompt, context):
    payload = {
        "inputs": {
            "question": prompt,
            "context": context
        }
    }
    headers = {"Authorization": f"Bearer {api_token}"}
    response = requests.post(API_URL_ROBERTA, headers=headers, json=payload)
    try:
        response_json = response.json()
        if 'error' in response_json:
            raise ValueError(response_json['error'])
        return response_json
    except ValueError as e:
        print(f"ValueError: {e}")
        return {"error": str(e)}
    except Exception as e:
        print(f"Exception: {e}")
        return {"error": "An unexpected error occurred"}

def generate_speech(answer):
    input_ids = tokenizer(answer, return_tensors="pt").input_ids.to(device)

    generation = model.generate(input_ids=input_ids).to(torch.float32)
    audio_arr = generation.cpu().numpy().squeeze()

    # Convert numpy array to audio segment
    audio_segment = AudioSegment(
        audio_arr.tobytes(),
        frame_rate=model.config.sampling_rate,
        sample_width=audio_arr.dtype.itemsize,
        channels=1
    )

    # Play the audio using simpleaudio
    try:
        play_obj = sa.play_buffer(
            audio_segment.raw_data,
            num_channels=1,
            bytes_per_sample=audio_segment.sample_width,
            sample_rate=audio_segment.frame_rate
        )
        play_obj.wait_done()  # Wait until the audio is done playing
    except Exception as e:
        print(f"Error playing audio: {e}")

def gradio_interface(api_token, prompt, context):
    answer = query_roberta(api_token, prompt, context)
    if 'error' in answer:
        return answer['error'], None
    generate_speech(answer.get('answer', ''))
    return answer.get('answer', 'No answer found'), None