GoodML commited on
Commit
22596d6
·
verified ·
1 Parent(s): 3066087

some changes to handle audio extraction

Browse files
Files changed (1) hide show
  1. app.py +84 -76
app.py CHANGED
@@ -152,23 +152,21 @@
152
 
153
  # Above code is without polling and sleep
154
 
155
- # Below is the latest code
156
  import os
 
157
  import whisper
158
  import requests
 
159
  import tempfile
160
- import warnings
161
- import threading
162
- import time
163
- from flask import Flask, request, jsonify
164
- from dotenv import load_dotenv
165
-
166
- warnings.filterwarnings("ignore", category=UserWarning, module="whisper")
167
 
168
  app = Flask(__name__)
169
 
170
  # Gemini API settings
 
 
171
  load_dotenv()
 
 
172
  API_KEY = os.getenv("FIRST_API_KEY")
173
 
174
  # Ensure the API key is loaded correctly
@@ -180,81 +178,100 @@ GEMINI_API_KEY = API_KEY
180
 
181
  # Load Whisper AI model at startup
182
  print("Loading Whisper AI model...")
183
- whisper_model = whisper.load_model("base")
184
  print("Whisper AI model loaded successfully.")
185
 
 
186
  # Define the "/" endpoint for health check
187
  @app.route("/", methods=["GET"])
188
  def health_check():
189
  return jsonify({"status": "success", "message": "API is running successfully!"}), 200
190
 
191
-
192
- def process_video_in_background(video_file, temp_video_file_name, result_container):
193
- """
194
- This function is executed in a separate thread to handle the long-running
195
- video processing tasks such as transcription and querying the Gemini API.
196
- """
197
- try:
198
- transcription = transcribe_audio(temp_video_file_name)
199
-
200
- if not transcription:
201
- result_container["error"] = "Audio transcription failed"
202
- return
203
-
204
- structured_data = query_gemini_api(transcription)
205
-
206
- # Save structured data to the result container to return later
207
- result_container["data"] = structured_data
208
-
209
- except Exception as e:
210
- result_container["error"] = f"Error processing video: {e}"
211
-
212
- finally:
213
- # Clean up temporary files
214
- if os.path.exists(temp_video_file_name):
215
- os.remove(temp_video_file_name)
216
-
217
 
218
  @app.route('/process-video', methods=['POST'])
219
  def process_video():
 
 
 
 
 
 
220
  if 'video' not in request.files:
221
  return jsonify({"error": "No video file provided"}), 400
222
 
223
  video_file = request.files['video']
224
- result_container = {}
225
 
226
  try:
227
- # Save video to a temporary file
228
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
229
  video_file.save(temp_video_file.name)
230
  print(f"Video file saved: {temp_video_file.name}")
231
 
232
- # Start the video processing in a background thread
233
- threading.Thread(target=process_video_in_background, args=(video_file, temp_video_file.name, result_container)).start()
234
 
235
- # Poll every 5 seconds to check if the result is available
236
- while "data" not in result_container and "error" not in result_container:
237
- print("Waiting for processing to complete...")
238
- time.sleep(5) # Sleep for 5 seconds before checking again
239
 
240
- # Check for the result
241
- if "error" in result_container:
242
- return jsonify({"error": result_container["error"]}), 500
243
- else:
244
- return jsonify({"message": "Processing complete", "data": result_container["data"]}), 200
 
 
 
 
 
 
245
 
246
  except Exception as e:
247
  return jsonify({"error": str(e)}), 500
248
 
 
 
 
 
 
249
 
250
- def transcribe_audio(video_path):
251
  """
252
- Transcribe audio directly from a video file using Whisper AI.
253
  """
254
  try:
255
- print(f"Transcribing video: {video_path}")
256
- result = whisper_model.transcribe(video_path)
257
- return result['text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  except Exception as e:
259
  print(f"Error in transcription: {e}")
260
  return None
@@ -280,41 +297,32 @@ def query_gemini_api(transcription):
280
  f"Text: {transcription}\n"
281
  )
282
 
 
283
  payload = {
284
  "contents": [
285
- {"parts": [{"text": prompt}]}
 
 
 
 
286
  ]
287
  }
288
  headers = {"Content-Type": "application/json"}
289
 
290
- # Send request to Gemini API
 
291
  response = requests.post(
292
  f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
293
  json=payload,
294
- headers=headers
 
295
  )
296
  response.raise_for_status()
297
 
298
- # Polling for response (in case Gemini takes time to process)
299
- polling_wait_time = 5 # Time to wait between polling attempts
300
- polling_max_retries = 60 # Maximum number of retries
301
-
302
- for attempt in range(polling_max_retries):
303
- print(f"Attempt {attempt + 1} to fetch Gemini API response...")
304
- response_data = response.json()
305
-
306
- # Check if the response is ready
307
- if "candidates" in response_data and len(response_data["candidates"]) > 0:
308
- return response_data["candidates"][0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
309
-
310
- time.sleep(polling_wait_time) # Wait before trying again
311
-
312
- return "Gemini API response not ready after multiple attempts."
313
 
314
  except requests.exceptions.RequestException as e:
315
  print(f"Error querying Gemini API: {e}")
316
- return {"error": str(e)}
317
-
318
-
319
- if __name__ == '__main__':
320
- app.run(debug=True)
 
152
 
153
  # Above code is without polling and sleep
154
 
 
155
  import os
156
+ import subprocess
157
  import whisper
158
  import requests
159
+ from flask import Flask, request, jsonify, render_template
160
  import tempfile
 
 
 
 
 
 
 
161
 
162
  app = Flask(__name__)
163
 
164
  # Gemini API settings
165
+ from dotenv import load_dotenv
166
+ # Load the .env file
167
  load_dotenv()
168
+
169
+ # Fetch the API key from the .env file
170
  API_KEY = os.getenv("FIRST_API_KEY")
171
 
172
  # Ensure the API key is loaded correctly
 
178
 
179
  # Load Whisper AI model at startup
180
  print("Loading Whisper AI model...")
181
+ whisper_model = whisper.load_model("base") # Choose model size: tiny, base, small, medium, large
182
  print("Whisper AI model loaded successfully.")
183
 
184
+
185
  # Define the "/" endpoint for health check
186
  @app.route("/", methods=["GET"])
187
  def health_check():
188
  return jsonify({"status": "success", "message": "API is running successfully!"}), 200
189
 
190
+ @app.route("/mbsa")
191
+ def mbsa():
192
+ return render_template("mbsa.html")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
  @app.route('/process-video', methods=['POST'])
195
  def process_video():
196
+ """
197
+ Flask endpoint to process video:
198
+ 1. Extract audio and transcribe using Whisper AI.
199
+ 2. Send transcription to Gemini API for recipe information extraction.
200
+ 3. Return structured data in the response.
201
+ """
202
  if 'video' not in request.files:
203
  return jsonify({"error": "No video file provided"}), 400
204
 
205
  video_file = request.files['video']
 
206
 
207
  try:
208
+ # Step 1: Save video to a temporary file
209
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
210
  video_file.save(temp_video_file.name)
211
  print(f"Video file saved: {temp_video_file.name}")
212
 
213
+ # Step 2: Extract audio from video using ffmpeg (waiting for completion)
214
+ audio_path = extract_audio(temp_video_file.name)
215
 
216
+ if not audio_path:
217
+ return jsonify({"error": "Audio extraction failed"}), 500
 
 
218
 
219
+ # Step 3: Transcribe the audio using Whisper AI (waiting for completion)
220
+ transcription = transcribe_audio(audio_path)
221
+
222
+ if not transcription:
223
+ return jsonify({"error": "Audio transcription failed"}), 500
224
+
225
+ # Step 4: Generate structured recipe information using Gemini API (waiting for completion)
226
+ structured_data = query_gemini_api(transcription)
227
+
228
+ # Step 5: Return the structured data
229
+ return jsonify(structured_data)
230
 
231
  except Exception as e:
232
  return jsonify({"error": str(e)}), 500
233
 
234
+ finally:
235
+ # Clean up temporary files
236
+ if os.path.exists(temp_video_file.name):
237
+ os.remove(temp_video_file.name)
238
+
239
 
240
+ def extract_audio(video_path):
241
  """
242
+ Extract audio from video using ffmpeg and save as WAV file.
243
  """
244
  try:
245
+ # Define the audio output path
246
+ audio_path = video_path.replace(".mp4", ".wav")
247
+ command = [
248
+ "ffmpeg",
249
+ "-i", video_path,
250
+ "-q:a", "0",
251
+ "-map", "a",
252
+ audio_path
253
+ ]
254
+
255
+ # Run the command and wait for it to finish (synchronous)
256
+ subprocess.run(command, check=True)
257
+ print(f"Audio extracted to: {audio_path}")
258
+ return audio_path
259
+
260
+ except Exception as e:
261
+ print(f"Error extracting audio: {e}")
262
+ return None
263
+
264
+
265
+ def transcribe_audio(audio_path):
266
+ """
267
+ Transcribe audio using Whisper AI.
268
+ """
269
+ try:
270
+ # Transcribe audio using Whisper AI
271
+ print("Transcribing audio...")
272
+ result = whisper_model.transcribe(audio_path)
273
+ return result.get("text", "").strip()
274
+
275
  except Exception as e:
276
  print(f"Error in transcription: {e}")
277
  return None
 
297
  f"Text: {transcription}\n"
298
  )
299
 
300
+ # Prepare the payload and headers
301
  payload = {
302
  "contents": [
303
+ {
304
+ "parts": [
305
+ {"text": prompt}
306
+ ]
307
+ }
308
  ]
309
  }
310
  headers = {"Content-Type": "application/json"}
311
 
312
+ # Send request to Gemini API and wait for the response
313
+ print("Querying Gemini API...")
314
  response = requests.post(
315
  f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
316
  json=payload,
317
+ headers=headers,
318
+ timeout=60 # 60 seconds timeout for the request
319
  )
320
  response.raise_for_status()
321
 
322
+ # Extract and return the structured data
323
+ data = response.json()
324
+ return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
 
 
 
 
 
 
 
 
 
 
 
 
325
 
326
  except requests.exceptions.RequestException as e:
327
  print(f"Error querying Gemini API: {e}")
328
+ return {"error": str(e)}