SamuelM0422 commited on
Commit
4ce999d
·
verified ·
1 Parent(s): e58af18

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +16 -0
  2. helper_functions.py +49 -0
app.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from helper_functions import ai_assistant
3
+
4
+ title = 'Ai Assistant 🤖'
5
+ description = 'A cascade approach consisting of a text transcription model combined with an llm and a synthesizer to create an ai assistant'
6
+
7
+ demo = gr.Interface(
8
+ fn=ai_assistant,
9
+ inputs=[gr.Audio(label='Command Input', sources=['microphone', 'upload'], type='filepath'), gr.Textbox(label='Groq API Key')],
10
+ outputs=[gr.Audio(label='Output', type='numpy'), gr.Textbox(label="Reponse")],
11
+ flagging_mode='never',
12
+ title=title,
13
+ description=description
14
+ )
15
+
16
+ demo.launch()
helper_functions.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline, VitsModel, AutoTokenizer
2
+ import torch
3
+ import os
4
+ from groq import Groq
5
+
6
+ # Transcriber model
7
+ transcriber = pipeline("automatic-speech-recognition", model="SamuelM0422/whisper-small-pt")
8
+
9
+ # Synthesise model
10
+ model = VitsModel.from_pretrained("facebook/mms-tts-por")
11
+ tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-por")
12
+
13
+ # LLM query function
14
+ def query(text, groq_api_key):
15
+ client = Groq(
16
+ api_key=groq_api_key,
17
+ )
18
+
19
+ chat_completion = client.chat.completions.create(
20
+ messages=[
21
+ {
22
+ 'role': 'system',
23
+ 'content': 'Answer the following question concisely and objectively. If there are numbers in the response, WRITE THEM IN WORDS.',
24
+ },
25
+ {
26
+ "role": "user",
27
+ "content": text,
28
+ }
29
+ ],
30
+ model="llama-3.1-8b-instant",
31
+ )
32
+
33
+ return chat_completion.choices[0].message.content
34
+
35
+ # Synthesise function
36
+ def synthesise(text):
37
+ inputs = tokenizer(text, return_tensors="pt")
38
+ with torch.no_grad():
39
+ output = model(**inputs).waveform
40
+
41
+ return output.cpu()
42
+
43
+ # Piecing all them together
44
+ def ai_assistant(filepath, groq_key):
45
+ transcription = transcriber(filepath)
46
+ response = query(transcription['text'], groq_key)
47
+ audio_response = synthesise(response)
48
+
49
+ return (16000, audio_response.squeeze().cpu().numpy()), response