Spaces:
Sleeping
Sleeping
from transformers import pipeline, VitsModel, AutoTokenizer | |
import torch | |
import os | |
from groq import Groq | |
# Transcriber model | |
transcriber = pipeline("automatic-speech-recognition", model="SamuelM0422/whisper-small-pt") | |
# Synthesise model | |
model = VitsModel.from_pretrained("facebook/mms-tts-por") | |
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-por") | |
# LLM query function | |
def query(text, groq_api_key): | |
client = Groq( | |
api_key=groq_api_key, | |
) | |
chat_completion = client.chat.completions.create( | |
messages=[ | |
{ | |
'role': 'system', | |
'content': 'Answer the following question concisely and objectively. If there are numbers in the response, WRITE THEM IN WORDS.', | |
}, | |
{ | |
"role": "user", | |
"content": text, | |
} | |
], | |
model="llama-3.1-8b-instant", | |
) | |
return chat_completion.choices[0].message.content | |
# Synthesise function | |
def synthesise(text): | |
inputs = tokenizer(text, return_tensors="pt") | |
with torch.no_grad(): | |
output = model(**inputs).waveform | |
return output.cpu() | |
# Piecing all them together | |
def ai_assistant(filepath, groq_key): | |
transcription = transcriber(filepath) | |
response = query(transcription['text'], groq_key) | |
audio_response = synthesise(response) | |
return (16000, audio_response.squeeze().cpu().numpy()), response | |