m-adil-ali commited on
Commit
54b3eb0
·
verified ·
1 Parent(s): 5dfc284

Upload untitled.py

Browse files
Files changed (1) hide show
  1. untitled.py +215 -0
untitled.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Untitled
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/12GhPKbBzxei0ZhB0r-m5kvNOaCRyCxiM
8
+ """
9
+
10
+ !pip install gradio openai gtts pydub numpy requests groq openai-whisper transformers
11
+ !apt-get install -y ffmpeg
12
+
13
+ import os
14
+ os.environ["GROQ_API_KEY"] = "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"
15
+
16
+ import os
17
+ import gradio as gr
18
+ import whisper
19
+ from gtts import gTTS
20
+ import io
21
+ from transformers import pipeline
22
+ from groq import Groq
23
+
24
+ # Initialize the Groq client
25
+ client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
26
+
27
+ # Load the Whisper model
28
+ whisper_model = whisper.load_model("base") # You can choose other models like "small", "medium", "large"
29
+
30
+ # Initialize the grammar correction pipeline
31
+ corrector = pipeline("text2text-generation", model="pszemraj/flan-t5-large-grammar-synthesis")
32
+
33
+ def process_audio(file_path):
34
+ try:
35
+ # Load the audio file
36
+ audio = whisper.load_audio(file_path)
37
+
38
+ # Transcribe the audio using Whisper
39
+ result = whisper_model.transcribe(audio)
40
+ user_text = result["text"]
41
+
42
+ # Display the user input text
43
+ corrected_text = corrector(user_text)[0]['generated_text'].strip()
44
+
45
+ # Generate a response using Groq
46
+ chat_completion = client.chat.completions.create(
47
+ messages=[{"role": "user", "content": corrected_text}],
48
+ model="llama3-8b-8192", # Replace with the correct model if necessary
49
+ )
50
+
51
+ # Access the response using dot notation
52
+ response_message = chat_completion.choices[0].message.content.strip()
53
+
54
+ # Convert the response text to speech
55
+ tts = gTTS(response_message)
56
+ response_audio_io = io.BytesIO()
57
+ tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object
58
+ response_audio_io.seek(0)
59
+
60
+ # Save audio to a file to ensure it's generated correctly
61
+ with open("response.mp3", "wb") as audio_file:
62
+ audio_file.write(response_audio_io.getvalue())
63
+
64
+ # Return the original text, corrected text, and the path to the saved audio file
65
+ return user_text, corrected_text, "response.mp3"
66
+
67
+ except Exception as e:
68
+ return f"An error occurred: {e}", None, None
69
+
70
+ # Create a Gradio interface with a submit button
71
+ iface = gr.Interface(
72
+ fn=process_audio,
73
+ inputs=gr.Audio(type="filepath"), # Use type="filepath"
74
+ outputs=[
75
+ gr.Textbox(label="User voice input into text"), # Original user input text
76
+ gr.Textbox(label="Corrected version of user input"), # Corrected text
77
+ gr.Audio(label="Response Audio") # Response audio
78
+ ],
79
+ live=False, # Ensure live mode is off to use a submit button
80
+ title="Audio Processing with Grammar Correction",
81
+ description="Upload an audio file, which will be transcribed, corrected for grammar, and then used to generate a response.",
82
+ allow_flagging="never"
83
+ )
84
+
85
+ iface.launch()
86
+
87
+
88
+
89
+
90
+ # import os
91
+ # import gradio as gr
92
+ # import whisper
93
+ # from gtts import gTTS
94
+ # import io
95
+ # from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
96
+ # from groq import Groq
97
+
98
+ # # Initialize the Groq client
99
+ # client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
100
+
101
+ # # Load the Whisper model
102
+ # whisper_model = whisper.load_model("base") # You can choose other models like "small", "medium", "large"
103
+
104
+ # # Initialize the grammar correction pipeline
105
+ # corrector = pipeline("text2text-generation", model="pszemraj/flan-t5-large-grammar-synthesis")
106
+
107
+ # def process_audio(file_path):
108
+ # try:
109
+ # # Load the audio file
110
+ # audio = whisper.load_audio(file_path)
111
+
112
+ # # Transcribe the audio using Whisper
113
+ # result = whisper_model.transcribe(audio)
114
+ # user_text = result["text"]
115
+
116
+ # # Display the user input text
117
+ # corrected_text = corrector(user_text)[0]['generated_text'].strip()
118
+
119
+ # # Generate a response using Groq
120
+ # chat_completion = client.chat.completions.create(
121
+ # messages=[{"role": "user", "content": corrected_text}],
122
+ # model="llama3-8b-8192", # Replace with the correct model if necessary
123
+ # )
124
+
125
+ # # Access the response using dot notation
126
+ # response_message = chat_completion.choices[0].message.content.strip()
127
+
128
+ # # Convert the response text to speech
129
+ # tts = gTTS(response_message)
130
+ # response_audio_io = io.BytesIO()
131
+ # tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object
132
+ # response_audio_io.seek(0)
133
+
134
+ # # Save audio to a file to ensure it's generated correctly
135
+ # with open("response.mp3", "wb") as audio_file:
136
+ # audio_file.write(response_audio_io.getvalue())
137
+
138
+ # # Return the original text, corrected text, and the path to the saved audio file
139
+ # return user_text, corrected_text, "response.mp3"
140
+
141
+ # except Exception as e:
142
+ # return f"An error occurred: {e}", None, None
143
+
144
+ # iface = gr.Interface(
145
+ # fn=process_audio,
146
+ # inputs=gr.Audio(type="filepath"), # Use type="filepath"
147
+ # outputs=[
148
+ # gr.Textbox(label="User voice input into text"), # Original user input text
149
+ # gr.Textbox(label="Corrected version of user input"), # Corrected text
150
+ # gr.Audio(label="Response Audio") # Response audio
151
+ # ],
152
+ # live=True
153
+ # )
154
+
155
+ # iface.launch()
156
+
157
+
158
+
159
+
160
+ # # import os
161
+ # # import gradio as gr
162
+ # # import whisper
163
+ # # from gtts import gTTS
164
+ # # import io
165
+ # # from groq import Groq
166
+
167
+ # # # Initialize the Groq client
168
+ # # client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
169
+
170
+ # # # Load the Whisper model
171
+ # # model = whisper.load_model("base") # You can choose other models like "small", "medium", "large"
172
+
173
+ # # def process_audio(file_path):
174
+ # # try:
175
+ # # # Load the audio file
176
+ # # audio = whisper.load_audio(file_path)
177
+
178
+ # # # Transcribe the audio using Whisper
179
+ # # result = model.transcribe(audio)
180
+ # # text = result["text"]
181
+
182
+ # # # Generate a response using Groq
183
+ # # chat_completion = client.chat.completions.create(
184
+ # # messages=[{"role": "user", "content": text}],
185
+ # # model="llama3-8b-8192", # Replace with the correct model if necessary
186
+ # # )
187
+
188
+ # # # Access the response using dot notation
189
+ # # response_message = chat_completion.choices[0].message.content.strip()
190
+
191
+ # # # Convert the response text to speech
192
+ # # tts = gTTS(response_message)
193
+ # # response_audio_io = io.BytesIO()
194
+ # # tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object
195
+ # # response_audio_io.seek(0)
196
+
197
+ # # # Save audio to a file to ensure it's generated correctly
198
+ # # with open("response.mp3", "wb") as audio_file:
199
+ # # audio_file.write(response_audio_io.getvalue())
200
+
201
+ # # # Return the response text and the path to the saved audio file
202
+ # # return response_message, "response.mp3"
203
+
204
+ # # except Exception as e:
205
+ # # return f"An error occurred: {e}", None
206
+
207
+ # # iface = gr.Interface(
208
+ # # fn=process_audio,
209
+ # # inputs=gr.Audio(type="filepath"), # Use type="filepath"
210
+ # # outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")],
211
+ # # live=True
212
+ # # )
213
+
214
+ # # iface.launch()
215
+