Spaces:

m-adil-ali
/

SpeakSmart

Sleeping

App Files Files Community

SpeakSmart / untitled.py

m-adil-ali

Upload untitled.py

54b3eb0 verified about 1 year ago

raw

history blame

7.33 kB

	# -- coding: utf-8 --
	"""Untitled

	Automatically generated by Colab.

	Original file is located at
	https://colab.research.google.com/drive/12GhPKbBzxei0ZhB0r-m5kvNOaCRyCxiM
	"""

	!pip install gradio openai gtts pydub numpy requests groq openai-whisper transformers
	!apt-get install -y ffmpeg

	import os
	os.environ["GROQ_API_KEY"] = "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"

	import os
	import gradio as gr
	import whisper
	from gtts import gTTS
	import io
	from transformers import pipeline
	from groq import Groq

	# Initialize the Groq client
	client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

	# Load the Whisper model
	whisper_model = whisper.load_model("base") # You can choose other models like "small", "medium", "large"

	# Initialize the grammar correction pipeline
	corrector = pipeline("text2text-generation", model="pszemraj/flan-t5-large-grammar-synthesis")

	def process_audio(file_path):
	try:
	# Load the audio file
	audio = whisper.load_audio(file_path)

	# Transcribe the audio using Whisper
	result = whisper_model.transcribe(audio)
	user_text = result["text"]

	# Display the user input text
	corrected_text = corrector(user_text)[0]['generated_text'].strip()

	# Generate a response using Groq
	chat_completion = client.chat.completions.create(
	messages=[{"role": "user", "content": corrected_text}],
	model="llama3-8b-8192", # Replace with the correct model if necessary
	)

	# Access the response using dot notation
	response_message = chat_completion.choices[0].message.content.strip()

	# Convert the response text to speech
	tts = gTTS(response_message)
	response_audio_io = io.BytesIO()
	tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object
	response_audio_io.seek(0)

	# Save audio to a file to ensure it's generated correctly
	with open("response.mp3", "wb") as audio_file:
	audio_file.write(response_audio_io.getvalue())

	# Return the original text, corrected text, and the path to the saved audio file
	return user_text, corrected_text, "response.mp3"

	except Exception as e:
	return f"An error occurred: {e}", None, None

	# Create a Gradio interface with a submit button
	iface = gr.Interface(
	fn=process_audio,
	inputs=gr.Audio(type="filepath"), # Use type="filepath"
	outputs=[
	gr.Textbox(label="User voice input into text"), # Original user input text
	gr.Textbox(label="Corrected version of user input"), # Corrected text
	gr.Audio(label="Response Audio") # Response audio
	],
	live=False, # Ensure live mode is off to use a submit button
	title="Audio Processing with Grammar Correction",
	description="Upload an audio file, which will be transcribed, corrected for grammar, and then used to generate a response.",
	allow_flagging="never"
	)

	iface.launch()




	# import os
	# import gradio as gr
	# import whisper
	# from gtts import gTTS
	# import io
	# from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
	# from groq import Groq

	# # Initialize the Groq client
	# client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

	# # Load the Whisper model
	# whisper_model = whisper.load_model("base") # You can choose other models like "small", "medium", "large"

	# # Initialize the grammar correction pipeline
	# corrector = pipeline("text2text-generation", model="pszemraj/flan-t5-large-grammar-synthesis")

	# def process_audio(file_path):
	# try:
	# # Load the audio file
	# audio = whisper.load_audio(file_path)

	# # Transcribe the audio using Whisper
	# result = whisper_model.transcribe(audio)
	# user_text = result["text"]

	# # Display the user input text
	# corrected_text = corrector(user_text)[0]['generated_text'].strip()

	# # Generate a response using Groq
	# chat_completion = client.chat.completions.create(
	# messages=[{"role": "user", "content": corrected_text}],
	# model="llama3-8b-8192", # Replace with the correct model if necessary
	# )

	# # Access the response using dot notation
	# response_message = chat_completion.choices[0].message.content.strip()

	# # Convert the response text to speech
	# tts = gTTS(response_message)
	# response_audio_io = io.BytesIO()
	# tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object
	# response_audio_io.seek(0)

	# # Save audio to a file to ensure it's generated correctly
	# with open("response.mp3", "wb") as audio_file:
	# audio_file.write(response_audio_io.getvalue())

	# # Return the original text, corrected text, and the path to the saved audio file
	# return user_text, corrected_text, "response.mp3"

	# except Exception as e:
	# return f"An error occurred: {e}", None, None

	# iface = gr.Interface(
	# fn=process_audio,
	# inputs=gr.Audio(type="filepath"), # Use type="filepath"
	# outputs=[
	# gr.Textbox(label="User voice input into text"), # Original user input text
	# gr.Textbox(label="Corrected version of user input"), # Corrected text
	# gr.Audio(label="Response Audio") # Response audio
	# ],
	# live=True
	# )

	# iface.launch()




	# # import os
	# # import gradio as gr
	# # import whisper
	# # from gtts import gTTS
	# # import io
	# # from groq import Groq

	# # # Initialize the Groq client
	# # client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

	# # # Load the Whisper model
	# # model = whisper.load_model("base") # You can choose other models like "small", "medium", "large"

	# # def process_audio(file_path):
	# # try:
	# # # Load the audio file
	# # audio = whisper.load_audio(file_path)

	# # # Transcribe the audio using Whisper
	# # result = model.transcribe(audio)
	# # text = result["text"]

	# # # Generate a response using Groq
	# # chat_completion = client.chat.completions.create(
	# # messages=[{"role": "user", "content": text}],
	# # model="llama3-8b-8192", # Replace with the correct model if necessary
	# # )

	# # # Access the response using dot notation
	# # response_message = chat_completion.choices[0].message.content.strip()

	# # # Convert the response text to speech
	# # tts = gTTS(response_message)
	# # response_audio_io = io.BytesIO()
	# # tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object
	# # response_audio_io.seek(0)

	# # # Save audio to a file to ensure it's generated correctly
	# # with open("response.mp3", "wb") as audio_file:
	# # audio_file.write(response_audio_io.getvalue())

	# # # Return the response text and the path to the saved audio file
	# # return response_message, "response.mp3"

	# # except Exception as e:
	# # return f"An error occurred: {e}", None

	# # iface = gr.Interface(
	# # fn=process_audio,
	# # inputs=gr.Audio(type="filepath"), # Use type="filepath"
	# # outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")],
	# # live=True
	# # )

	# # iface.launch()