Spaces:

GoodML
/

dishDecode

Running

File size: 5,306 Bytes

import os
import subprocess
import whisper
import requests
import tempfile
import warnings
import threading
from flask import Flask, request, jsonify, send_file, render_template

from dotenv import load_dotenv
import requests




warnings.filterwarnings("ignore", category=UserWarning, module="whisper")


app = Flask(__name__)


# Gemini API settings
load_dotenv()
API_KEY = os.getenv("FIRST_API_KEY")

# Ensure the API key is loaded correctly
if not API_KEY:
    raise ValueError("API Key not found. Make sure it is set in the .env file.")

GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
GEMINI_API_KEY = API_KEY

# Load Whisper AI model at startup
print("Loading Whisper AI model...")
whisper_model = whisper.load_model("base")
print("Whisper AI model loaded successfully.")

# Define the "/" endpoint for health check
@app.route("/", methods=["GET"])
def health_check():
    return jsonify({"status": "success", "message": "API is running successfully!"}), 200


def process_video_in_background(video_file, temp_video_file_name):
    """
    This function is executed in a separate thread to handle the long-running
    video processing tasks such as transcription and querying the Gemini API.
    """
    try:
        transcription = transcribe_audio(temp_video_file_name)

        if not transcription:
            print("Audio transcription failed")
            return

        structured_data = query_gemini_api(transcription)

        # Send structured data back or store it in a database, depending on your use case
        print("Processing complete. Structured data:", structured_data)

    except Exception as e:
        print(f"Error processing video: {e}")

    finally:
        # Clean up temporary files
        if os.path.exists(temp_video_file_name):
            os.remove(temp_video_file_name)


@app.route('/process-video', methods=['POST'])
def process_video():
    if 'video' not in request.files:
        return jsonify({"error": "No video file provided"}), 400

    video_file = request.files['video']

    try:
        # Save video to a temporary file
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
            video_file.save(temp_video_file.name)
            print(f"Video file saved: {temp_video_file.name}")

            # Start the video processing in a background thread
            threading.Thread(target=process_video_in_background, args=(video_file, temp_video_file.name)).start()

            return jsonify({"message": "Video is being processed in the background."}), 202

    except Exception as e:
        return jsonify({"error": str(e)}), 500


def transcribe_audio(video_path):
    """
    Transcribe audio directly from a video file using Whisper AI.
    """
    try:
        print(f"Transcribing video: {video_path}")
        result = whisper_model.transcribe(video_path)
        return result['text']
    except Exception as e:
        print(f"Error in transcription: {e}")
        return None


def query_gemini_api(transcription):
    """
    Send transcription text to Gemini API and fetch structured recipe information.
    """
    try:
        # Define the structured prompt
        prompt = (
            "Analyze the provided cooking video transcription and extract the following structured information:\n"
            "1. Recipe Name: Identify the name of the dish being prepared.\n"
            "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
            "3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
            "4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n"
            "5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n"
            "6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n"
            "7. Serving size: In count of people or portion size.\n"
            "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
            "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
            f"Text: {transcription}\n"
        )

        payload = {
            "contents": [
                {"parts": [{"text": prompt}]}
            ]
        }
        headers = {"Content-Type": "application/json"}

        # Send request to Gemini API
        response = requests.post(
            f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
            json=payload,
            headers=headers
        )
        response.raise_for_status()

        # Extract and return the structured data
        data = response.json()
        return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")

    except requests.exceptions.RequestException as e:
        print(f"Error querying Gemini API: {e}")
        return {"error": str(e)}


if __name__ == '__main__':
    app.run(debug=True)