Spaces:

m-adil-ali
/

SpeakSmart

Sleeping

App Files Files Community

m-adil-ali commited on Sep 1, 2024

Commit

54b3eb0

verified ·

1 Parent(s): 5dfc284

Upload untitled.py

Browse files

Files changed (1) hide show

untitled.py +215 -0

untitled.py ADDED Viewed

	@@ -0,0 +1,215 @@

+# -*- coding: utf-8 -*-
+"""Untitled
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/12GhPKbBzxei0ZhB0r-m5kvNOaCRyCxiM
+"""
+!pip install gradio openai gtts pydub numpy requests groq openai-whisper transformers
+!apt-get install -y ffmpeg
+import os
+os.environ["GROQ_API_KEY"] = "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"
+import os
+import gradio as gr
+import whisper
+from gtts import gTTS
+import io
+from transformers import pipeline
+from groq import Groq
+# Initialize the Groq client
+client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+# Load the Whisper model
+whisper_model = whisper.load_model("base")  # You can choose other models like "small", "medium", "large"
+# Initialize the grammar correction pipeline
+corrector = pipeline("text2text-generation", model="pszemraj/flan-t5-large-grammar-synthesis")
+def process_audio(file_path):
+    try:
+        # Load the audio file
+        audio = whisper.load_audio(file_path)
+        # Transcribe the audio using Whisper
+        result = whisper_model.transcribe(audio)
+        user_text = result["text"]
+        # Display the user input text
+        corrected_text = corrector(user_text)[0]['generated_text'].strip()
+        # Generate a response using Groq
+        chat_completion = client.chat.completions.create(
+            messages=[{"role": "user", "content": corrected_text}],
+            model="llama3-8b-8192",  # Replace with the correct model if necessary
+        )
+        # Access the response using dot notation
+        response_message = chat_completion.choices[0].message.content.strip()
+        # Convert the response text to speech
+        tts = gTTS(response_message)
+        response_audio_io = io.BytesIO()
+        tts.write_to_fp(response_audio_io)  # Save the audio to the BytesIO object
+        response_audio_io.seek(0)
+        # Save audio to a file to ensure it's generated correctly
+        with open("response.mp3", "wb") as audio_file:
+            audio_file.write(response_audio_io.getvalue())
+        # Return the original text, corrected text, and the path to the saved audio file
+        return user_text, corrected_text, "response.mp3"
+    except Exception as e:
+        return f"An error occurred: {e}", None, None
+# Create a Gradio interface with a submit button
+iface = gr.Interface(
+    fn=process_audio,
+    inputs=gr.Audio(type="filepath"),  # Use type="filepath"
+    outputs=[
+        gr.Textbox(label="User voice input into text"),  # Original user input text
+        gr.Textbox(label="Corrected version of user input"),  # Corrected text
+        gr.Audio(label="Response Audio")  # Response audio
+    ],
+    live=False,  # Ensure live mode is off to use a submit button
+    title="Audio Processing with Grammar Correction",
+    description="Upload an audio file, which will be transcribed, corrected for grammar, and then used to generate a response.",
+    allow_flagging="never"
+)
+iface.launch()
+# import os
+# import gradio as gr
+# import whisper
+# from gtts import gTTS
+# import io
+# from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
+# from groq import Groq
+# # Initialize the Groq client
+# client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+# # Load the Whisper model
+# whisper_model = whisper.load_model("base")  # You can choose other models like "small", "medium", "large"
+# # Initialize the grammar correction pipeline
+# corrector = pipeline("text2text-generation", model="pszemraj/flan-t5-large-grammar-synthesis")
+# def process_audio(file_path):
+#     try:
+#         # Load the audio file
+#         audio = whisper.load_audio(file_path)
+#         # Transcribe the audio using Whisper
+#         result = whisper_model.transcribe(audio)
+#         user_text = result["text"]
+#         # Display the user input text
+#         corrected_text = corrector(user_text)[0]['generated_text'].strip()
+#         # Generate a response using Groq
+#         chat_completion = client.chat.completions.create(
+#             messages=[{"role": "user", "content": corrected_text}],
+#             model="llama3-8b-8192",  # Replace with the correct model if necessary
+#         )
+#         # Access the response using dot notation
+#         response_message = chat_completion.choices[0].message.content.strip()
+#         # Convert the response text to speech
+#         tts = gTTS(response_message)
+#         response_audio_io = io.BytesIO()
+#         tts.write_to_fp(response_audio_io)  # Save the audio to the BytesIO object
+#         response_audio_io.seek(0)
+#         # Save audio to a file to ensure it's generated correctly
+#         with open("response.mp3", "wb") as audio_file:
+#             audio_file.write(response_audio_io.getvalue())
+#         # Return the original text, corrected text, and the path to the saved audio file
+#         return user_text, corrected_text, "response.mp3"
+#     except Exception as e:
+#         return f"An error occurred: {e}", None, None
+# iface = gr.Interface(
+#     fn=process_audio,
+#     inputs=gr.Audio(type="filepath"),  # Use type="filepath"
+#     outputs=[
+#         gr.Textbox(label="User voice input into text"),  # Original user input text
+#         gr.Textbox(label="Corrected version of user input"),  # Corrected text
+#         gr.Audio(label="Response Audio")  # Response audio
+#     ],
+#     live=True
+# )
+# iface.launch()
+# # import os
+# # import gradio as gr
+# # import whisper
+# # from gtts import gTTS
+# # import io
+# # from groq import Groq
+# # # Initialize the Groq client
+# # client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+# # # Load the Whisper model
+# # model = whisper.load_model("base")  # You can choose other models like "small", "medium", "large"
+# # def process_audio(file_path):
+# #     try:
+# #         # Load the audio file
+# #         audio = whisper.load_audio(file_path)
+# #         # Transcribe the audio using Whisper
+# #         result = model.transcribe(audio)
+# #         text = result["text"]
+# #         # Generate a response using Groq
+# #         chat_completion = client.chat.completions.create(
+# #             messages=[{"role": "user", "content": text}],
+# #             model="llama3-8b-8192",  # Replace with the correct model if necessary
+# #         )
+# #         # Access the response using dot notation
+# #         response_message = chat_completion.choices[0].message.content.strip()
+# #         # Convert the response text to speech
+# #         tts = gTTS(response_message)
+# #         response_audio_io = io.BytesIO()
+# #         tts.write_to_fp(response_audio_io)  # Save the audio to the BytesIO object
+# #         response_audio_io.seek(0)
+# #         # Save audio to a file to ensure it's generated correctly
+# #         with open("response.mp3", "wb") as audio_file:
+# #             audio_file.write(response_audio_io.getvalue())
+# #         # Return the response text and the path to the saved audio file
+# #         return response_message, "response.mp3"
+# #     except Exception as e:
+# #         return f"An error occurred: {e}", None
+# # iface = gr.Interface(
+# #     fn=process_audio,
+# #     inputs=gr.Audio(type="filepath"),  # Use type="filepath"
+# #     outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")],
+# #     live=True
+# # )
+# # iface.launch()