Spaces:

GoodML
/

dishDecode

Running

App Files Files Community

GoodML commited on Nov 25, 2024

Commit

d85921f

verified ·

1 Parent(s): bb47241

some threading included

Browse files

Files changed (1) hide show

app.py +39 -74

app.py CHANGED Viewed

@@ -4,16 +4,15 @@ import whisper
 import requests
 from flask import Flask, request, jsonify, send_file
 import tempfile
 app = Flask(__name__)
 # Gemini API settings
-from dotenv import load_dotenv
-import requests
-# Load the .env file
 load_dotenv()
-# Fetch the API key from the .env file
 API_KEY = os.getenv("FIRST_API_KEY")
 # Ensure the API key is loaded correctly
@@ -25,27 +24,45 @@ GEMINI_API_KEY = API_KEY
 # Load Whisper AI model at startup
 print("Loading Whisper AI model...")
-whisper_model = whisper.load_model("base")  # Choose model size: tiny, base, small, medium, large
 print("Whisper AI model loaded successfully.")
 # Define the "/" endpoint for health check
 @app.route("/", methods=["GET"])
 def health_check():
     return jsonify({"status": "success", "message": "API is running successfully!"}), 200
 @app.route('/process-video', methods=['POST'])
 def process_video():
-    """
-    Flask endpoint to process video:
-    1. Extract audio and transcribe using Whisper AI.
-    2. Send transcription to Gemini API for recipe information extraction.
-    3. Return structured data in the response.
-    """
     if 'video' not in request.files:
-        return jsonify({"error1": "No video file provided"}), 400
     video_file = request.files['video']
@@ -55,74 +72,28 @@ def process_video():
             video_file.save(temp_video_file.name)
             print(f"Video file saved: {temp_video_file.name}")
-            # Extract audio and transcribe using Whisper AI
-            transcription = transcribe_audio(temp_video_file.name)
-            if not transcription:
-                return jsonify({"error2": "Audio transcription failed"}), 500
-            # Generate structured recipe information using Gemini API
-            structured_data = query_gemini_api(transcription)
-            return jsonify(structured_data)
     except Exception as e:
-        return jsonify({"error3": str(e)}), 500
-    finally:
-        # Clean up temporary files
-        if os.path.exists(temp_video_file.name):
-            os.remove(temp_video_file.name)
-# def transcribe_audio(video_path):
-#     """
-#     Extract audio from video file and transcribe using Whisper AI.
-#     """
-#     try:
-#         # Extract audio using ffmpeg
-#         audio_path = video_path.replace(".mp4", ".wav")
-#         command = [
-#             "ffmpeg",
-#             "-i", video_path,
-#             "-q:a", "0",
-#             "-map", "a",
-#             audio_path
-#         ]
-#         subprocess.run(command, check=True)
-#         print(f"Audio extracted to: {audio_path}")
-#         # Transcribe audio using Whisper AI
-#         print("Transcribing audio...")
-#         result = whisper_model.transcribe(audio_path)
-#         # Clean up audio file after transcription
-#         if os.path.exists(audio_path):
-#             os.remove(audio_path)
-#         return result.get("text", "").strip()
-#     except Exception as e:
-#         print(f"Error in transcription: {e}")
-#         return None
 def transcribe_audio(video_path):
     """
     Transcribe audio directly from a video file using Whisper AI.
     """
     try:
-        # Transcribe audio from video directly using Whisper AI
         print(f"Transcribing video: {video_path}")
         result = whisper_model.transcribe(video_path)
         return result['text']
     except Exception as e:
-        print(f"Error in transcription4: {e}")
         return None
 def query_gemini_api(transcription):
     """
     Send transcription text to Gemini API and fetch structured recipe information.
@@ -143,20 +114,14 @@ def query_gemini_api(transcription):
             f"Text: {transcription}\n"
         )
-        # Prepare the payload and headers
         payload = {
             "contents": [
-                {
-                    "parts": [
-                        {"text": prompt}
-                    ]
-                }
             ]
         }
         headers = {"Content-Type": "application/json"}
         # Send request to Gemini API
-        print("Querying Gemini API...")
         response = requests.post(
             f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
             json=payload,
@@ -170,8 +135,8 @@ def query_gemini_api(transcription):
     except requests.exceptions.RequestException as e:
         print(f"Error querying Gemini API: {e}")
-        return {"error5": str(e)}
 if __name__ == '__main__':
-    app.run(debug=True)

 import requests
 from flask import Flask, request, jsonify, send_file
 import tempfile
+import warnings
+warnings.filterwarnings("ignore", category=UserWarning, module="whisper")
 app = Flask(__name__)
 # Gemini API settings
 load_dotenv()
 API_KEY = os.getenv("FIRST_API_KEY")
 # Ensure the API key is loaded correctly
 # Load Whisper AI model at startup
 print("Loading Whisper AI model...")
+whisper_model = whisper.load_model("base")
 print("Whisper AI model loaded successfully.")
 # Define the "/" endpoint for health check
 @app.route("/", methods=["GET"])
 def health_check():
     return jsonify({"status": "success", "message": "API is running successfully!"}), 200
+def process_video_in_background(video_file, temp_video_file_name):
+    """
+    This function is executed in a separate thread to handle the long-running
+    video processing tasks such as transcription and querying the Gemini API.
+    """
+    try:
+        transcription = transcribe_audio(temp_video_file_name)
+        if not transcription:
+            print("Audio transcription failed")
+            return
+        structured_data = query_gemini_api(transcription)
+        # Send structured data back or store it in a database, depending on your use case
+        print("Processing complete. Structured data:", structured_data)
+    except Exception as e:
+        print(f"Error processing video: {e}")
+    finally:
+        # Clean up temporary files
+        if os.path.exists(temp_video_file_name):
+            os.remove(temp_video_file_name)
 @app.route('/process-video', methods=['POST'])
 def process_video():
     if 'video' not in request.files:
+        return jsonify({"error": "No video file provided"}), 400
     video_file = request.files['video']
             video_file.save(temp_video_file.name)
             print(f"Video file saved: {temp_video_file.name}")
+            # Start the video processing in a background thread
+            threading.Thread(target=process_video_in_background, args=(video_file, temp_video_file.name)).start()
+            return jsonify({"message": "Video is being processed in the background."}), 202
     except Exception as e:
+        return jsonify({"error": str(e)}), 500
 def transcribe_audio(video_path):
     """
     Transcribe audio directly from a video file using Whisper AI.
     """
     try:
         print(f"Transcribing video: {video_path}")
         result = whisper_model.transcribe(video_path)
         return result['text']
     except Exception as e:
+        print(f"Error in transcription: {e}")
         return None
 def query_gemini_api(transcription):
     """
     Send transcription text to Gemini API and fetch structured recipe information.
             f"Text: {transcription}\n"
         )
         payload = {
             "contents": [
+                {"parts": [{"text": prompt}]}
             ]
         }
         headers = {"Content-Type": "application/json"}
         # Send request to Gemini API
         response = requests.post(
             f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
             json=payload,
     except requests.exceptions.RequestException as e:
         print(f"Error querying Gemini API: {e}")
+        return {"error": str(e)}
 if __name__ == '__main__':
+    app.run(debug=True)