GoodML commited on
Commit
c139644
·
verified ·
1 Parent(s): 3e72eb1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -58
app.py CHANGED
@@ -151,9 +151,7 @@
151
 
152
 
153
  # Above code is without polling and sleep
154
-
155
  import os
156
- import subprocess
157
  import whisper
158
  import requests
159
  from flask import Flask, request, jsonify, render_template
@@ -161,6 +159,7 @@ import tempfile
161
 
162
  app = Flask(__name__)
163
  print("APP IS RUNNING, ANIKET")
 
164
  # Gemini API settings
165
  from dotenv import load_dotenv
166
  # Load the .env file
@@ -194,39 +193,31 @@ def health_check():
194
  def mbsa():
195
  return render_template("mbsa.html")
196
 
197
- @app.route('/process-video', methods=['POST'])
198
- def process_video():
199
- print("GOT THE PROCESS VIDEO REQUEST, ANIKET")
200
  """
201
- Flask endpoint to process video:
202
- 1. Extract audio and transcribe using Whisper AI.
203
  2. Send transcription to Gemini API for recipe information extraction.
204
  3. Return structured data in the response.
205
  """
206
 
207
- if 'video' not in request.files:
208
- return jsonify({"error": "No video file provided"}), 400
209
 
210
- video_file = request.files['video']
211
- print("VIDEO FILE NAME: ", video_file)
212
  try:
213
  print("SAVING THE FILE TEMPO, ANIKET")
214
- # Step 1: Save video to a temporary file
215
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
216
- video_file.save(temp_video_file.name)
217
- print(f"Video file saved: {temp_video_file.name}")
218
 
219
-
220
- # Step 2: Extract audio from video using ffmpeg (waiting for completion)
221
- audio_path = extract_audio(temp_video_file.name)
222
- print("AUDIO PATH FROM LINE 221, ANIKET", audio_path)
223
-
224
- if not audio_path:
225
- return jsonify({"error": "Audio extraction failed"}), 500
226
-
227
- print("STARTING TRANSCRIPTION, GOT THE .WAV AUDIO PATH THAT WAS STORED TEMPO, ANIKET")
228
- # Step 3: Transcribe the audio using Whisper AI (waiting for completion)
229
- transcription = transcribe_audio(audio_path)
230
 
231
  print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
232
 
@@ -236,11 +227,11 @@ def process_video():
236
  print("GOT THE transcription")
237
 
238
  print("Starting the GEMINI REQUEST TO STRUCTURE IT")
239
- # Step 4: Generate structured recipe information using Gemini API (waiting for completion)
240
  structured_data = query_gemini_api(transcription)
241
 
242
  print("GOT THE STRUCTURED DATA", structured_data)
243
- # Step 5: Return the structured data
244
  return jsonify(structured_data)
245
 
246
  except Exception as e:
@@ -248,42 +239,16 @@ def process_video():
248
 
249
  finally:
250
  # Clean up temporary files
251
- if os.path.exists(temp_video_file.name):
252
- os.remove(temp_video_file.name)
253
-
254
-
255
- def extract_audio(video_path):
256
- """
257
- Extract audio from video using ffmpeg and save as WAV file.
258
- """
259
- try:
260
- # Define the audio output path
261
- audio_path = video_path.replace(".mp4", ".wav")
262
- command = [
263
- "ffmpeg",
264
- "-i", video_path,
265
- "-q:a", "0",
266
- "-map", "a",
267
- audio_path
268
- ]
269
-
270
- # Run the command and wait for it to finish (synchronous)
271
- subprocess.run(command, check=True)
272
- print(f"Audio extracted to: {audio_path}")
273
- return audio_path
274
-
275
- except Exception as e:
276
- print(f"Error extracting audio: {e}")
277
- return None
278
 
279
 
280
  def transcribe_audio(audio_path):
281
  """
282
  Transcribe audio using Whisper AI.
283
  """
284
- print("CAME IN THE transcribe audio folder")
285
  try:
286
-
287
  # Transcribe audio using Whisper AI
288
  print("Transcribing audio...")
289
  result = whisper_model.transcribe(audio_path)
@@ -348,4 +313,4 @@ def query_gemini_api(transcription):
348
 
349
 
350
  if __name__ == '__main__':
351
- app.run(debug=True)
 
151
 
152
 
153
  # Above code is without polling and sleep
 
154
  import os
 
155
  import whisper
156
  import requests
157
  from flask import Flask, request, jsonify, render_template
 
159
 
160
  app = Flask(__name__)
161
  print("APP IS RUNNING, ANIKET")
162
+
163
  # Gemini API settings
164
  from dotenv import load_dotenv
165
  # Load the .env file
 
193
  def mbsa():
194
  return render_template("mbsa.html")
195
 
196
+ @app.route('/process-audio', methods=['POST'])
197
+ def process_audio():
198
+ print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
199
  """
200
+ Flask endpoint to process audio:
201
+ 1. Transcribe provided audio file using Whisper AI.
202
  2. Send transcription to Gemini API for recipe information extraction.
203
  3. Return structured data in the response.
204
  """
205
 
206
+ if 'audio' not in request.files:
207
+ return jsonify({"error": "No audio file provided"}), 400
208
 
209
+ audio_file = request.files['audio']
210
+ print("AUDIO FILE NAME: ", audio_file)
211
  try:
212
  print("SAVING THE FILE TEMPO, ANIKET")
213
+ # Step 1: Save audio to a temporary file
214
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
215
+ audio_file.save(temp_audio_file.name)
216
+ print(f"Audio file saved: {temp_audio_file.name}")
217
 
218
+ print("STARTING TRANSCRIPTION, ANIKET")
219
+ # Step 2: Transcribe the audio using Whisper AI
220
+ transcription = transcribe_audio(temp_audio_file.name)
 
 
 
 
 
 
 
 
221
 
222
  print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
223
 
 
227
  print("GOT THE transcription")
228
 
229
  print("Starting the GEMINI REQUEST TO STRUCTURE IT")
230
+ # Step 3: Generate structured recipe information using Gemini API
231
  structured_data = query_gemini_api(transcription)
232
 
233
  print("GOT THE STRUCTURED DATA", structured_data)
234
+ # Step 4: Return the structured data
235
  return jsonify(structured_data)
236
 
237
  except Exception as e:
 
239
 
240
  finally:
241
  # Clean up temporary files
242
+ if os.path.exists(temp_audio_file.name):
243
+ os.remove(temp_audio_file.name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
 
246
  def transcribe_audio(audio_path):
247
  """
248
  Transcribe audio using Whisper AI.
249
  """
250
+ print("CAME IN THE transcribe audio function")
251
  try:
 
252
  # Transcribe audio using Whisper AI
253
  print("Transcribing audio...")
254
  result = whisper_model.transcribe(audio_path)
 
313
 
314
 
315
  if __name__ == '__main__':
316
+ app.run(debug=True)