Spaces:

GoodML
/

dishDecode

Running

App Files Files Community

GoodML commited on Nov 25, 2024

Commit

22596d6

verified ·

1 Parent(s): 3066087

some changes to handle audio extraction

Browse files

Files changed (1) hide show

app.py +84 -76

app.py CHANGED Viewed

@@ -152,23 +152,21 @@
 # Above code is without polling and sleep
-# Below is the latest code
 import os
 import whisper
 import requests
 import tempfile
-import warnings
-import threading
-import time
-from flask import Flask, request, jsonify
-from dotenv import load_dotenv
-warnings.filterwarnings("ignore", category=UserWarning, module="whisper")
 app = Flask(__name__)
 # Gemini API settings
 load_dotenv()
 API_KEY = os.getenv("FIRST_API_KEY")
 # Ensure the API key is loaded correctly
@@ -180,81 +178,100 @@ GEMINI_API_KEY = API_KEY
 # Load Whisper AI model at startup
 print("Loading Whisper AI model...")
-whisper_model = whisper.load_model("base")
 print("Whisper AI model loaded successfully.")
 # Define the "/" endpoint for health check
 @app.route("/", methods=["GET"])
 def health_check():
     return jsonify({"status": "success", "message": "API is running successfully!"}), 200
-def process_video_in_background(video_file, temp_video_file_name, result_container):
-    """
-    This function is executed in a separate thread to handle the long-running
-    video processing tasks such as transcription and querying the Gemini API.
-    """
-    try:
-        transcription = transcribe_audio(temp_video_file_name)
-        if not transcription:
-            result_container["error"] = "Audio transcription failed"
-            return
-        structured_data = query_gemini_api(transcription)
-        # Save structured data to the result container to return later
-        result_container["data"] = structured_data
-    except Exception as e:
-        result_container["error"] = f"Error processing video: {e}"
-    finally:
-        # Clean up temporary files
-        if os.path.exists(temp_video_file_name):
-            os.remove(temp_video_file_name)
 @app.route('/process-video', methods=['POST'])
 def process_video():
     if 'video' not in request.files:
         return jsonify({"error": "No video file provided"}), 400
     video_file = request.files['video']
-    result_container = {}
     try:
-        # Save video to a temporary file
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
             video_file.save(temp_video_file.name)
             print(f"Video file saved: {temp_video_file.name}")
-            # Start the video processing in a background thread
-            threading.Thread(target=process_video_in_background, args=(video_file, temp_video_file.name, result_container)).start()
-            # Poll every 5 seconds to check if the result is available
-            while "data" not in result_container and "error" not in result_container:
-                print("Waiting for processing to complete...")
-                time.sleep(5)  # Sleep for 5 seconds before checking again
-            # Check for the result
-            if "error" in result_container:
-                return jsonify({"error": result_container["error"]}), 500
-            else:
-                return jsonify({"message": "Processing complete", "data": result_container["data"]}), 200
     except Exception as e:
         return jsonify({"error": str(e)}), 500
-def transcribe_audio(video_path):
     """
-    Transcribe audio directly from a video file using Whisper AI.
     """
     try:
-        print(f"Transcribing video: {video_path}")
-        result = whisper_model.transcribe(video_path)
-        return result['text']
     except Exception as e:
         print(f"Error in transcription: {e}")
         return None
@@ -280,41 +297,32 @@ def query_gemini_api(transcription):
             f"Text: {transcription}\n"
         )
         payload = {
             "contents": [
-                {"parts": [{"text": prompt}]}
             ]
         }
         headers = {"Content-Type": "application/json"}
-        # Send request to Gemini API
         response = requests.post(
             f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
             json=payload,
-            headers=headers
         )
         response.raise_for_status()
-        # Polling for response (in case Gemini takes time to process)
-        polling_wait_time = 5  # Time to wait between polling attempts
-        polling_max_retries = 60  # Maximum number of retries
-        for attempt in range(polling_max_retries):
-            print(f"Attempt {attempt + 1} to fetch Gemini API response...")
-            response_data = response.json()
-            # Check if the response is ready
-            if "candidates" in response_data and len(response_data["candidates"]) > 0:
-                return response_data["candidates"][0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
-            time.sleep(polling_wait_time)  # Wait before trying again
-        return "Gemini API response not ready after multiple attempts."
     except requests.exceptions.RequestException as e:
         print(f"Error querying Gemini API: {e}")
-        return {"error": str(e)}
-if __name__ == '__main__':
-    app.run(debug=True)

 # Above code is without polling and sleep
 import os
+import subprocess
 import whisper
 import requests
+from flask import Flask, request, jsonify, render_template
 import tempfile
 app = Flask(__name__)
 # Gemini API settings
+from dotenv import load_dotenv
+# Load the .env file
 load_dotenv()
+# Fetch the API key from the .env file
 API_KEY = os.getenv("FIRST_API_KEY")
 # Ensure the API key is loaded correctly
 # Load Whisper AI model at startup
 print("Loading Whisper AI model...")
+whisper_model = whisper.load_model("base")  # Choose model size: tiny, base, small, medium, large
 print("Whisper AI model loaded successfully.")
 # Define the "/" endpoint for health check
 @app.route("/", methods=["GET"])
 def health_check():
     return jsonify({"status": "success", "message": "API is running successfully!"}), 200
+@app.route("/mbsa")
+def mbsa():
+    return render_template("mbsa.html")
 @app.route('/process-video', methods=['POST'])
 def process_video():
+    """
+    Flask endpoint to process video:
+    1. Extract audio and transcribe using Whisper AI.
+    2. Send transcription to Gemini API for recipe information extraction.
+    3. Return structured data in the response.
+    """
     if 'video' not in request.files:
         return jsonify({"error": "No video file provided"}), 400
     video_file = request.files['video']
     try:
+        # Step 1: Save video to a temporary file
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
             video_file.save(temp_video_file.name)
             print(f"Video file saved: {temp_video_file.name}")
+            # Step 2: Extract audio from video using ffmpeg (waiting for completion)
+            audio_path = extract_audio(temp_video_file.name)
+            if not audio_path:
+                return jsonify({"error": "Audio extraction failed"}), 500
+            # Step 3: Transcribe the audio using Whisper AI (waiting for completion)
+            transcription = transcribe_audio(audio_path)
+            if not transcription:
+                return jsonify({"error": "Audio transcription failed"}), 500
+            # Step 4: Generate structured recipe information using Gemini API (waiting for completion)
+            structured_data = query_gemini_api(transcription)
+            # Step 5: Return the structured data
+            return jsonify(structured_data)
     except Exception as e:
         return jsonify({"error": str(e)}), 500
+    finally:
+        # Clean up temporary files
+        if os.path.exists(temp_video_file.name):
+            os.remove(temp_video_file.name)
+def extract_audio(video_path):
     """
+    Extract audio from video using ffmpeg and save as WAV file.
     """
     try:
+        # Define the audio output path
+        audio_path = video_path.replace(".mp4", ".wav")
+        command = [
+            "ffmpeg",
+            "-i", video_path,
+            "-q:a", "0",
+            "-map", "a",
+            audio_path
+        ]
+        # Run the command and wait for it to finish (synchronous)
+        subprocess.run(command, check=True)
+        print(f"Audio extracted to: {audio_path}")
+        return audio_path
+    except Exception as e:
+        print(f"Error extracting audio: {e}")
+        return None
+def transcribe_audio(audio_path):
+    """
+    Transcribe audio using Whisper AI.
+    """
+    try:
+        # Transcribe audio using Whisper AI
+        print("Transcribing audio...")
+        result = whisper_model.transcribe(audio_path)
+        return result.get("text", "").strip()
     except Exception as e:
         print(f"Error in transcription: {e}")
         return None
             f"Text: {transcription}\n"
         )
+        # Prepare the payload and headers
         payload = {
             "contents": [
+                {
+                    "parts": [
+                        {"text": prompt}
+                    ]
+                }
             ]
         }
         headers = {"Content-Type": "application/json"}
+        # Send request to Gemini API and wait for the response
+        print("Querying Gemini API...")
         response = requests.post(
             f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
             json=payload,
+            headers=headers,
+            timeout=60  # 60 seconds timeout for the request
         )
         response.raise_for_status()
+        # Extract and return the structured data
+        data = response.json()
+        return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
     except requests.exceptions.RequestException as e:
         print(f"Error querying Gemini API: {e}")
+        return {"error": str(e)}