Spaces:

SamuelM0422
/

ai_assistant

Sleeping

SamuelM0422 commited on 22 days ago

Commit

4ce999d

verified ·

1 Parent(s): e58af18

Upload 2 files

Files changed (2) hide show

app.py ADDED Viewed

+import gradio as gr
+from helper_functions import ai_assistant
+title = 'Ai Assistant 🤖'
+description = 'A cascade approach consisting of a text transcription model combined with an llm and a synthesizer to create an ai assistant'
+demo = gr.Interface(
+    fn=ai_assistant,
+    inputs=[gr.Audio(label='Command Input', sources=['microphone', 'upload'], type='filepath'), gr.Textbox(label='Groq API Key')],
+    outputs=[gr.Audio(label='Output', type='numpy'), gr.Textbox(label="Reponse")],
+    flagging_mode='never',
+    title=title,
+    description=description
+)
+demo.launch()

helper_functions.py ADDED Viewed

+from transformers import pipeline, VitsModel, AutoTokenizer
+import torch
+import os
+from groq import Groq
+# Transcriber model
+transcriber = pipeline("automatic-speech-recognition", model="SamuelM0422/whisper-small-pt")
+# Synthesise model
+model = VitsModel.from_pretrained("facebook/mms-tts-por")
+tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-por")
+# LLM query function
+def query(text, groq_api_key):
+  client = Groq(
+    api_key=groq_api_key,
+  )
+  chat_completion = client.chat.completions.create(
+      messages=[
+          {
+              'role': 'system',
+              'content': 'Answer the following question concisely and objectively. If there are numbers in the response, WRITE THEM IN WORDS.',
+          },
+          {
+              "role": "user",
+              "content": text,
+          }
+      ],
+      model="llama-3.1-8b-instant",
+  )
+  return chat_completion.choices[0].message.content
+# Synthesise function
+def synthesise(text):
+    inputs = tokenizer(text, return_tensors="pt")
+    with torch.no_grad():
+        output = model(**inputs).waveform
+    return output.cpu()
+# Piecing all them together
+def ai_assistant(filepath, groq_key):
+  transcription = transcriber(filepath)
+  response = query(transcription['text'], groq_key)
+  audio_response = synthesise(response)
+  return (16000, audio_response.squeeze().cpu().numpy()), response