File size: 5,306 Bytes
c37b36e
 
 
 
 
d85921f
35b7e36
1ab9028
 
 
 
 
 
 
d85921f
 
 
c37b36e
 
 
1ab9028
c37b36e
 
 
 
 
 
 
 
 
 
 
 
 
d85921f
c37b36e
 
d5b84f5
 
 
 
 
 
d85921f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5b84f5
c37b36e
 
 
d85921f
c37b36e
 
 
 
 
 
 
 
 
d85921f
 
c37b36e
d85921f
c37b36e
 
d85921f
c37b36e
d842bdb
c37b36e
 
d842bdb
c37b36e
 
d842bdb
 
bb47241
c37b36e
d85921f
c37b36e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d85921f
c37b36e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d85921f
c37b36e
 
 
d85921f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import os
import subprocess
import whisper
import requests
import tempfile
import warnings
import threading
from flask import Flask, request, jsonify, send_file, render_template

from dotenv import load_dotenv
import requests




warnings.filterwarnings("ignore", category=UserWarning, module="whisper")


app = Flask(__name__)


# Gemini API settings
load_dotenv()
API_KEY = os.getenv("FIRST_API_KEY")

# Ensure the API key is loaded correctly
if not API_KEY:
    raise ValueError("API Key not found. Make sure it is set in the .env file.")

GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
GEMINI_API_KEY = API_KEY

# Load Whisper AI model at startup
print("Loading Whisper AI model...")
whisper_model = whisper.load_model("base")
print("Whisper AI model loaded successfully.")

# Define the "/" endpoint for health check
@app.route("/", methods=["GET"])
def health_check():
    return jsonify({"status": "success", "message": "API is running successfully!"}), 200


def process_video_in_background(video_file, temp_video_file_name):
    """
    This function is executed in a separate thread to handle the long-running
    video processing tasks such as transcription and querying the Gemini API.
    """
    try:
        transcription = transcribe_audio(temp_video_file_name)

        if not transcription:
            print("Audio transcription failed")
            return

        structured_data = query_gemini_api(transcription)

        # Send structured data back or store it in a database, depending on your use case
        print("Processing complete. Structured data:", structured_data)

    except Exception as e:
        print(f"Error processing video: {e}")

    finally:
        # Clean up temporary files
        if os.path.exists(temp_video_file_name):
            os.remove(temp_video_file_name)


@app.route('/process-video', methods=['POST'])
def process_video():
    if 'video' not in request.files:
        return jsonify({"error": "No video file provided"}), 400

    video_file = request.files['video']

    try:
        # Save video to a temporary file
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
            video_file.save(temp_video_file.name)
            print(f"Video file saved: {temp_video_file.name}")

            # Start the video processing in a background thread
            threading.Thread(target=process_video_in_background, args=(video_file, temp_video_file.name)).start()

            return jsonify({"message": "Video is being processed in the background."}), 202

    except Exception as e:
        return jsonify({"error": str(e)}), 500


def transcribe_audio(video_path):
    """
    Transcribe audio directly from a video file using Whisper AI.
    """
    try:
        print(f"Transcribing video: {video_path}")
        result = whisper_model.transcribe(video_path)
        return result['text']
    except Exception as e:
        print(f"Error in transcription: {e}")
        return None


def query_gemini_api(transcription):
    """
    Send transcription text to Gemini API and fetch structured recipe information.
    """
    try:
        # Define the structured prompt
        prompt = (
            "Analyze the provided cooking video transcription and extract the following structured information:\n"
            "1. Recipe Name: Identify the name of the dish being prepared.\n"
            "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
            "3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
            "4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n"
            "5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n"
            "6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n"
            "7. Serving size: In count of people or portion size.\n"
            "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
            "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
            f"Text: {transcription}\n"
        )

        payload = {
            "contents": [
                {"parts": [{"text": prompt}]}
            ]
        }
        headers = {"Content-Type": "application/json"}

        # Send request to Gemini API
        response = requests.post(
            f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
            json=payload,
            headers=headers
        )
        response.raise_for_status()

        # Extract and return the structured data
        data = response.json()
        return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")

    except requests.exceptions.RequestException as e:
        print(f"Error querying Gemini API: {e}")
        return {"error": str(e)}


if __name__ == '__main__':
    app.run(debug=True)