Spaces:
Sleeping
Sleeping
Upload untitled.py
Browse files- untitled.py +215 -0
untitled.py
ADDED
@@ -0,0 +1,215 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""Untitled
|
3 |
+
|
4 |
+
Automatically generated by Colab.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/12GhPKbBzxei0ZhB0r-m5kvNOaCRyCxiM
|
8 |
+
"""
|
9 |
+
|
10 |
+
!pip install gradio openai gtts pydub numpy requests groq openai-whisper transformers
|
11 |
+
!apt-get install -y ffmpeg
|
12 |
+
|
13 |
+
import os
|
14 |
+
os.environ["GROQ_API_KEY"] = "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"
|
15 |
+
|
16 |
+
import os
|
17 |
+
import gradio as gr
|
18 |
+
import whisper
|
19 |
+
from gtts import gTTS
|
20 |
+
import io
|
21 |
+
from transformers import pipeline
|
22 |
+
from groq import Groq
|
23 |
+
|
24 |
+
# Initialize the Groq client
|
25 |
+
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
26 |
+
|
27 |
+
# Load the Whisper model
|
28 |
+
whisper_model = whisper.load_model("base") # You can choose other models like "small", "medium", "large"
|
29 |
+
|
30 |
+
# Initialize the grammar correction pipeline
|
31 |
+
corrector = pipeline("text2text-generation", model="pszemraj/flan-t5-large-grammar-synthesis")
|
32 |
+
|
33 |
+
def process_audio(file_path):
|
34 |
+
try:
|
35 |
+
# Load the audio file
|
36 |
+
audio = whisper.load_audio(file_path)
|
37 |
+
|
38 |
+
# Transcribe the audio using Whisper
|
39 |
+
result = whisper_model.transcribe(audio)
|
40 |
+
user_text = result["text"]
|
41 |
+
|
42 |
+
# Display the user input text
|
43 |
+
corrected_text = corrector(user_text)[0]['generated_text'].strip()
|
44 |
+
|
45 |
+
# Generate a response using Groq
|
46 |
+
chat_completion = client.chat.completions.create(
|
47 |
+
messages=[{"role": "user", "content": corrected_text}],
|
48 |
+
model="llama3-8b-8192", # Replace with the correct model if necessary
|
49 |
+
)
|
50 |
+
|
51 |
+
# Access the response using dot notation
|
52 |
+
response_message = chat_completion.choices[0].message.content.strip()
|
53 |
+
|
54 |
+
# Convert the response text to speech
|
55 |
+
tts = gTTS(response_message)
|
56 |
+
response_audio_io = io.BytesIO()
|
57 |
+
tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object
|
58 |
+
response_audio_io.seek(0)
|
59 |
+
|
60 |
+
# Save audio to a file to ensure it's generated correctly
|
61 |
+
with open("response.mp3", "wb") as audio_file:
|
62 |
+
audio_file.write(response_audio_io.getvalue())
|
63 |
+
|
64 |
+
# Return the original text, corrected text, and the path to the saved audio file
|
65 |
+
return user_text, corrected_text, "response.mp3"
|
66 |
+
|
67 |
+
except Exception as e:
|
68 |
+
return f"An error occurred: {e}", None, None
|
69 |
+
|
70 |
+
# Create a Gradio interface with a submit button
|
71 |
+
iface = gr.Interface(
|
72 |
+
fn=process_audio,
|
73 |
+
inputs=gr.Audio(type="filepath"), # Use type="filepath"
|
74 |
+
outputs=[
|
75 |
+
gr.Textbox(label="User voice input into text"), # Original user input text
|
76 |
+
gr.Textbox(label="Corrected version of user input"), # Corrected text
|
77 |
+
gr.Audio(label="Response Audio") # Response audio
|
78 |
+
],
|
79 |
+
live=False, # Ensure live mode is off to use a submit button
|
80 |
+
title="Audio Processing with Grammar Correction",
|
81 |
+
description="Upload an audio file, which will be transcribed, corrected for grammar, and then used to generate a response.",
|
82 |
+
allow_flagging="never"
|
83 |
+
)
|
84 |
+
|
85 |
+
iface.launch()
|
86 |
+
|
87 |
+
|
88 |
+
|
89 |
+
|
90 |
+
# import os
|
91 |
+
# import gradio as gr
|
92 |
+
# import whisper
|
93 |
+
# from gtts import gTTS
|
94 |
+
# import io
|
95 |
+
# from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
|
96 |
+
# from groq import Groq
|
97 |
+
|
98 |
+
# # Initialize the Groq client
|
99 |
+
# client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
100 |
+
|
101 |
+
# # Load the Whisper model
|
102 |
+
# whisper_model = whisper.load_model("base") # You can choose other models like "small", "medium", "large"
|
103 |
+
|
104 |
+
# # Initialize the grammar correction pipeline
|
105 |
+
# corrector = pipeline("text2text-generation", model="pszemraj/flan-t5-large-grammar-synthesis")
|
106 |
+
|
107 |
+
# def process_audio(file_path):
|
108 |
+
# try:
|
109 |
+
# # Load the audio file
|
110 |
+
# audio = whisper.load_audio(file_path)
|
111 |
+
|
112 |
+
# # Transcribe the audio using Whisper
|
113 |
+
# result = whisper_model.transcribe(audio)
|
114 |
+
# user_text = result["text"]
|
115 |
+
|
116 |
+
# # Display the user input text
|
117 |
+
# corrected_text = corrector(user_text)[0]['generated_text'].strip()
|
118 |
+
|
119 |
+
# # Generate a response using Groq
|
120 |
+
# chat_completion = client.chat.completions.create(
|
121 |
+
# messages=[{"role": "user", "content": corrected_text}],
|
122 |
+
# model="llama3-8b-8192", # Replace with the correct model if necessary
|
123 |
+
# )
|
124 |
+
|
125 |
+
# # Access the response using dot notation
|
126 |
+
# response_message = chat_completion.choices[0].message.content.strip()
|
127 |
+
|
128 |
+
# # Convert the response text to speech
|
129 |
+
# tts = gTTS(response_message)
|
130 |
+
# response_audio_io = io.BytesIO()
|
131 |
+
# tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object
|
132 |
+
# response_audio_io.seek(0)
|
133 |
+
|
134 |
+
# # Save audio to a file to ensure it's generated correctly
|
135 |
+
# with open("response.mp3", "wb") as audio_file:
|
136 |
+
# audio_file.write(response_audio_io.getvalue())
|
137 |
+
|
138 |
+
# # Return the original text, corrected text, and the path to the saved audio file
|
139 |
+
# return user_text, corrected_text, "response.mp3"
|
140 |
+
|
141 |
+
# except Exception as e:
|
142 |
+
# return f"An error occurred: {e}", None, None
|
143 |
+
|
144 |
+
# iface = gr.Interface(
|
145 |
+
# fn=process_audio,
|
146 |
+
# inputs=gr.Audio(type="filepath"), # Use type="filepath"
|
147 |
+
# outputs=[
|
148 |
+
# gr.Textbox(label="User voice input into text"), # Original user input text
|
149 |
+
# gr.Textbox(label="Corrected version of user input"), # Corrected text
|
150 |
+
# gr.Audio(label="Response Audio") # Response audio
|
151 |
+
# ],
|
152 |
+
# live=True
|
153 |
+
# )
|
154 |
+
|
155 |
+
# iface.launch()
|
156 |
+
|
157 |
+
|
158 |
+
|
159 |
+
|
160 |
+
# # import os
|
161 |
+
# # import gradio as gr
|
162 |
+
# # import whisper
|
163 |
+
# # from gtts import gTTS
|
164 |
+
# # import io
|
165 |
+
# # from groq import Groq
|
166 |
+
|
167 |
+
# # # Initialize the Groq client
|
168 |
+
# # client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
169 |
+
|
170 |
+
# # # Load the Whisper model
|
171 |
+
# # model = whisper.load_model("base") # You can choose other models like "small", "medium", "large"
|
172 |
+
|
173 |
+
# # def process_audio(file_path):
|
174 |
+
# # try:
|
175 |
+
# # # Load the audio file
|
176 |
+
# # audio = whisper.load_audio(file_path)
|
177 |
+
|
178 |
+
# # # Transcribe the audio using Whisper
|
179 |
+
# # result = model.transcribe(audio)
|
180 |
+
# # text = result["text"]
|
181 |
+
|
182 |
+
# # # Generate a response using Groq
|
183 |
+
# # chat_completion = client.chat.completions.create(
|
184 |
+
# # messages=[{"role": "user", "content": text}],
|
185 |
+
# # model="llama3-8b-8192", # Replace with the correct model if necessary
|
186 |
+
# # )
|
187 |
+
|
188 |
+
# # # Access the response using dot notation
|
189 |
+
# # response_message = chat_completion.choices[0].message.content.strip()
|
190 |
+
|
191 |
+
# # # Convert the response text to speech
|
192 |
+
# # tts = gTTS(response_message)
|
193 |
+
# # response_audio_io = io.BytesIO()
|
194 |
+
# # tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object
|
195 |
+
# # response_audio_io.seek(0)
|
196 |
+
|
197 |
+
# # # Save audio to a file to ensure it's generated correctly
|
198 |
+
# # with open("response.mp3", "wb") as audio_file:
|
199 |
+
# # audio_file.write(response_audio_io.getvalue())
|
200 |
+
|
201 |
+
# # # Return the response text and the path to the saved audio file
|
202 |
+
# # return response_message, "response.mp3"
|
203 |
+
|
204 |
+
# # except Exception as e:
|
205 |
+
# # return f"An error occurred: {e}", None
|
206 |
+
|
207 |
+
# # iface = gr.Interface(
|
208 |
+
# # fn=process_audio,
|
209 |
+
# # inputs=gr.Audio(type="filepath"), # Use type="filepath"
|
210 |
+
# # outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")],
|
211 |
+
# # live=True
|
212 |
+
# # )
|
213 |
+
|
214 |
+
# # iface.launch()
|
215 |
+
|