Spaces:

GoodML
/

dishDecode

Running

App Files Files Community

GoodML commited on Nov 25, 2024

Commit

c139644

verified ·

1 Parent(s): 3e72eb1

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -58

app.py CHANGED Viewed

@@ -151,9 +151,7 @@
 # Above code is without polling and sleep
 import os
-import subprocess
 import whisper
 import requests
 from flask import Flask, request, jsonify, render_template
@@ -161,6 +159,7 @@ import tempfile
 app = Flask(__name__)
 print("APP IS RUNNING, ANIKET")
 # Gemini API settings
 from dotenv import load_dotenv
 # Load the .env file
@@ -194,39 +193,31 @@ def health_check():
 def mbsa():
     return render_template("mbsa.html")
-@app.route('/process-video', methods=['POST'])
-def process_video():
-    print("GOT THE PROCESS VIDEO REQUEST, ANIKET")
     """
-    Flask endpoint to process video:
-    1. Extract audio and transcribe using Whisper AI.
     2. Send transcription to Gemini API for recipe information extraction.
     3. Return structured data in the response.
     """
-    if 'video' not in request.files:
-        return jsonify({"error": "No video file provided"}), 400
-    video_file = request.files['video']
-    print("VIDEO FILE NAME: ", video_file)
     try:
         print("SAVING THE FILE TEMPO, ANIKET")
-        # Step 1: Save video to a temporary file
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
-            video_file.save(temp_video_file.name)
-            print(f"Video file saved: {temp_video_file.name}")
-            # Step 2: Extract audio from video using ffmpeg (waiting for completion)
-            audio_path = extract_audio(temp_video_file.name)
-            print("AUDIO PATH FROM LINE 221, ANIKET", audio_path)
-            if not audio_path:
-                return jsonify({"error": "Audio extraction failed"}), 500
-            print("STARTING TRANSCRIPTION, GOT THE .WAV AUDIO PATH THAT WAS STORED TEMPO, ANIKET")
-            # Step 3: Transcribe the audio using Whisper AI (waiting for completion)
-            transcription = transcribe_audio(audio_path)
             print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
@@ -236,11 +227,11 @@ def process_video():
             print("GOT THE transcription")
             print("Starting the GEMINI REQUEST TO STRUCTURE IT")
-            # Step 4: Generate structured recipe information using Gemini API (waiting for completion)
             structured_data = query_gemini_api(transcription)
             print("GOT THE STRUCTURED DATA", structured_data)
-            # Step 5: Return the structured data
             return jsonify(structured_data)
     except Exception as e:
@@ -248,42 +239,16 @@ def process_video():
     finally:
         # Clean up temporary files
-        if os.path.exists(temp_video_file.name):
-            os.remove(temp_video_file.name)
-def extract_audio(video_path):
-    """
-    Extract audio from video using ffmpeg and save as WAV file.
-    """
-    try:
-        # Define the audio output path
-        audio_path = video_path.replace(".mp4", ".wav")
-        command = [
-            "ffmpeg",
-            "-i", video_path,
-            "-q:a", "0",
-            "-map", "a",
-            audio_path
-        ]
-        # Run the command and wait for it to finish (synchronous)
-        subprocess.run(command, check=True)
-        print(f"Audio extracted to: {audio_path}")
-        return audio_path
-    except Exception as e:
-        print(f"Error extracting audio: {e}")
-        return None
 def transcribe_audio(audio_path):
     """
     Transcribe audio using Whisper AI.
     """
-    print("CAME IN THE transcribe audio folder")
     try:
         # Transcribe audio using Whisper AI
         print("Transcribing audio...")
         result = whisper_model.transcribe(audio_path)
@@ -348,4 +313,4 @@ def query_gemini_api(transcription):
 if __name__ == '__main__':
-    app.run(debug=True)

 # Above code is without polling and sleep
 import os
 import whisper
 import requests
 from flask import Flask, request, jsonify, render_template
 app = Flask(__name__)
 print("APP IS RUNNING, ANIKET")
 # Gemini API settings
 from dotenv import load_dotenv
 # Load the .env file
 def mbsa():
     return render_template("mbsa.html")
+@app.route('/process-audio', methods=['POST'])
+def process_audio():
+    print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
     """
+    Flask endpoint to process audio:
+    1. Transcribe provided audio file using Whisper AI.
     2. Send transcription to Gemini API for recipe information extraction.
     3. Return structured data in the response.
     """
+    if 'audio' not in request.files:
+        return jsonify({"error": "No audio file provided"}), 400
+    audio_file = request.files['audio']
+    print("AUDIO FILE NAME: ", audio_file)
     try:
         print("SAVING THE FILE TEMPO, ANIKET")
+        # Step 1: Save audio to a temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
+            audio_file.save(temp_audio_file.name)
+            print(f"Audio file saved: {temp_audio_file.name}")
+            print("STARTING TRANSCRIPTION, ANIKET")
+            # Step 2: Transcribe the audio using Whisper AI
+            transcription = transcribe_audio(temp_audio_file.name)
             print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
             print("GOT THE transcription")
             print("Starting the GEMINI REQUEST TO STRUCTURE IT")
+            # Step 3: Generate structured recipe information using Gemini API
             structured_data = query_gemini_api(transcription)
             print("GOT THE STRUCTURED DATA", structured_data)
+            # Step 4: Return the structured data
             return jsonify(structured_data)
     except Exception as e:
     finally:
         # Clean up temporary files
+        if os.path.exists(temp_audio_file.name):
+            os.remove(temp_audio_file.name)
 def transcribe_audio(audio_path):
     """
     Transcribe audio using Whisper AI.
     """
+    print("CAME IN THE transcribe audio function")
     try:
         # Transcribe audio using Whisper AI
         print("Transcribing audio...")
         result = whisper_model.transcribe(audio_path)
 if __name__ == '__main__':
+    app.run(debug=True)