Spaces:

GoodML
/

dishDecode

Running

App Files Files Community

GoodML commited on Nov 25, 2024

Commit

20b2044

verified ·

1 Parent(s): abd67f5

Update app.py

Browse files

Files changed (1) hide show

app.py +209 -209

app.py CHANGED Viewed

@@ -1,175 +1,16 @@
-# import os
-# import whisper
-# import requests
-# import asyncio
-# import aiohttp  # For making async HTTP requests
-# from quart import Quart, request, jsonify, render_template
-# from dotenv import load_dotenv
-# import warnings
-# warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
-# app = Quart(__name__)
-# print("APP IS RUNNING, ANIKET")
-# # Load the .env file
-# load_dotenv()
-# print("ENV LOADED, ANIKET")
-# # Fetch the API key from the .env file
-# API_KEY = os.getenv("FIRST_API_KEY")
-# # Ensure the API key is loaded correctly
-# if not API_KEY:
-#     raise ValueError("API Key not found. Make sure it is set in the .env file.")
-# GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
-# GEMINI_API_KEY = API_KEY
-# # Load Whisper AI model at startup
-# print("Loading Whisper AI model..., ANIKET")
-# whisper_model = whisper.load_model("base")  # Choose model size: tiny, base, small, medium, large
-# print("Whisper AI model loaded successfully, ANIKET")
-# @app.route("/", methods=["GET"])
-# async def health_check():
-#     return jsonify({"status": "success", "message": "API is running successfully!"}), 200
-# @app.route("/mbsa")
-# async def mbsa():
-#     return await render_template("mbsa.html")
-# @app.route('/process-audio', methods=['POST'])
-# async def process_audio():
-#     print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
-#     if 'audio' not in request.files:
-#         return jsonify({"error": "No audio file provided"}), 400
-#     audio_file = request.files['audio']
-#     print("AUDIO FILE NAME: ", audio_file)
-#     try:
-#         print("STARTING TRANSCRIPTION, ANIKET")
-#         # Step 1: Transcribe the uploaded audio file asynchronously
-#         transcription = await transcribe_audio(audio_file)
-#         print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
-#         if not transcription:
-#             return jsonify({"error": "Audio transcription failed"}), 500
-#         print("GOT THE transcription")
-#         print("Starting the GEMINI REQUEST TO STRUCTURE IT")
-#         # Step 2: Generate structured recipe information using Gemini API asynchronously
-#         structured_data = await query_gemini_api(transcription)
-#         print("GOT THE STRUCTURED DATA", structured_data)
-#         # Step 3: Return the structured data
-#         return jsonify(structured_data)
-#     except Exception as e:
-#         return jsonify({"error": str(e)}), 500
-# async def transcribe_audio(audio_file):
-#     """
-#     Transcribe audio using Whisper AI (async function).
-#     """
-#     print("CAME IN THE transcribe audio function")
-#     try:
-#         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
-#             audio_file.save(temp_audio_file.name)
-#             print(f"Temporary audio file saved: {temp_audio_file.name}")
-#             # Run Whisper transcription asynchronously
-#             loop = asyncio.get_event_loop()
-#             result = await loop.run_in_executor(None, whisper_model.transcribe, temp_audio_file.name)
-#             print("THE RESULTS ARE", result)
-#         return result.get("text", "").strip()
-#     except Exception as e:
-#         print(f"Error in transcription: {e}")
-#         return None
-# async def query_gemini_api(transcription):
-#     """
-#     Send transcription text to Gemini API and fetch structured recipe information (async function).
-#     """
-#     try:
-#         # Define the structured prompt
-#         prompt = (
-#             "Analyze the provided cooking video transcription and extract the following structured information:\n"
-#             "1. Recipe Name: Identify the name of the dish being prepared.\n"
-#             "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
-#             "3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
-#             "4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n"
-#             "5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n"
-#             "6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n"
-#             "7. Serving size: In count of people or portion size.\n"
-#             "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
-#             "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
-#             f"Text: {transcription}\n"
-#         )
-#         # Prepare the payload and headers
-#         payload = {
-#             "contents": [
-#                 {
-#                     "parts": [
-#                         {"text": prompt}
-#                     ]
-#                 }
-#             ]
-#         }
-#         headers = {"Content-Type": "application/json"}
-#         # Send request to Gemini API asynchronously
-#         async with aiohttp.ClientSession() as session:
-#             async with session.post(
-#                 f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
-#                 json=payload,
-#                 headers=headers,
-#                 timeout=60  # 60 seconds timeout for the request
-#             ) as response:
-#                 response.raise_for_status()  # Raise error if response code is not 200
-#                 data = await response.json()
-#         return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
-#     except aiohttp.ClientError as e:
-#         print(f"Error querying Gemini API: {e}")
-#         return {"error": str(e)}
-# if __name__ == '__main__':
-#     app.run(debug=True)
-# Above code is without polling and sleep
 import os
 import whisper
 import requests
-from flask import Flask, request, jsonify, render_template
-import tempfile
 import warnings
 warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
-app = Flask(__name__)
 print("APP IS RUNNING, ANIKET")
-# Gemini API settings
-from dotenv import load_dotenv
 # Load the .env file
 load_dotenv()
@@ -185,73 +26,72 @@ if not API_KEY:
 GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
 GEMINI_API_KEY = API_KEY
 # Load Whisper AI model at startup
 print("Loading Whisper AI model..., ANIKET")
 whisper_model = whisper.load_model("base")  # Choose model size: tiny, base, small, medium, large
 print("Whisper AI model loaded successfully, ANIKET")
-# Define the "/" endpoint for health check
 @app.route("/", methods=["GET"])
-def health_check():
     return jsonify({"status": "success", "message": "API is running successfully!"}), 200
 @app.route("/mbsa")
-def mbsa():
-    return render_template("mbsa.html")
 @app.route('/process-audio', methods=['POST'])
-def process_audio():
     print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
-    """
-    Flask endpoint to process audio:
-    1. Transcribe provided audio file using Whisper AI.
-    2. Send transcription to Gemini API for recipe information extraction.
-    3. Return structured data in the response.
-    """
     if 'audio' not in request.files:
         return jsonify({"error": "No audio file provided"}), 400
     audio_file = request.files['audio']
     print("AUDIO FILE NAME: ", audio_file)
     try:
         print("STARTING TRANSCRIPTION, ANIKET")
-        # Step 1: Transcribe the uploaded audio file directly
-        audio_file = request.files['audio']
-        transcription = transcribe_audio(audio_file)
         print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
         if not transcription:
             return jsonify({"error": "Audio transcription failed"}), 500
         print("GOT THE transcription")
         print("Starting the GEMINI REQUEST TO STRUCTURE IT")
-        # Step 2: Generate structured recipe information using Gemini API
-        structured_data = query_gemini_api(transcription)
         print("GOT THE STRUCTURED DATA", structured_data)
         # Step 3: Return the structured data
         return jsonify(structured_data)
     except Exception as e:
         return jsonify({"error": str(e)}), 500
-def transcribe_audio(audio_path):
     """
-    Transcribe audio using Whisper AI.
     """
     print("CAME IN THE transcribe audio function")
     try:
-        # Transcribe audio using Whisper AI
-        print("Transcribing audio...")
-        result = whisper_model.transcribe(audio_path)
-        print("THE RESULTS ARE", result)
         return result.get("text", "").strip()
     except Exception as e:
@@ -259,9 +99,9 @@ def transcribe_audio(audio_path):
         return None
-def query_gemini_api(transcription):
     """
-    Send transcription text to Gemini API and fetch structured recipe information.
     """
     try:
         # Define the structured prompt
@@ -291,21 +131,20 @@ def query_gemini_api(transcription):
         }
         headers = {"Content-Type": "application/json"}
-        # Send request to Gemini API and wait for the response
-        print("Querying Gemini API...")
-        response = requests.post(
-            f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
-            json=payload,
-            headers=headers,
-            timeout=60  # 60 seconds timeout for the request
-        )
-        response.raise_for_status()
-        # Extract and return the structured data
-        data = response.json()
         return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
-    except requests.exceptions.RequestException as e:
         print(f"Error querying Gemini API: {e}")
         return {"error": str(e)}
@@ -317,6 +156,167 @@ if __name__ == '__main__':
 # import os

 import os
 import whisper
 import requests
+import asyncio
+import aiohttp  # For making async HTTP requests
+from quart import Quart, request, jsonify, render_template
+from dotenv import load_dotenv
 import warnings
 warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
+app = Quart(__name__)
 print("APP IS RUNNING, ANIKET")
 # Load the .env file
 load_dotenv()
 GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
 GEMINI_API_KEY = API_KEY
 # Load Whisper AI model at startup
 print("Loading Whisper AI model..., ANIKET")
 whisper_model = whisper.load_model("base")  # Choose model size: tiny, base, small, medium, large
 print("Whisper AI model loaded successfully, ANIKET")
 @app.route("/", methods=["GET"])
+async def health_check():
     return jsonify({"status": "success", "message": "API is running successfully!"}), 200
 @app.route("/mbsa")
+async def mbsa():
+    return await render_template("mbsa.html")
 @app.route('/process-audio', methods=['POST'])
+async def process_audio():
     print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
     if 'audio' not in request.files:
         return jsonify({"error": "No audio file provided"}), 400
     audio_file = request.files['audio']
     print("AUDIO FILE NAME: ", audio_file)
     try:
         print("STARTING TRANSCRIPTION, ANIKET")
+        # Step 1: Transcribe the uploaded audio file asynchronously
+        transcription = await transcribe_audio(audio_file)
         print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
         if not transcription:
             return jsonify({"error": "Audio transcription failed"}), 500
         print("GOT THE transcription")
         print("Starting the GEMINI REQUEST TO STRUCTURE IT")
+        # Step 2: Generate structured recipe information using Gemini API asynchronously
+        structured_data = await query_gemini_api(transcription)
         print("GOT THE STRUCTURED DATA", structured_data)
         # Step 3: Return the structured data
         return jsonify(structured_data)
     except Exception as e:
         return jsonify({"error": str(e)}), 500
+async def transcribe_audio(audio_file):
     """
+    Transcribe audio using Whisper AI (async function).
     """
     print("CAME IN THE transcribe audio function")
     try:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
+            audio_file.save(temp_audio_file.name)
+            print(f"Temporary audio file saved: {temp_audio_file.name}")
+            # Run Whisper transcription asynchronously
+            loop = asyncio.get_event_loop()
+            result = await loop.run_in_executor(None, whisper_model.transcribe, temp_audio_file.name)
+            print("THE RESULTS ARE", result)
         return result.get("text", "").strip()
     except Exception as e:
         return None
+async def query_gemini_api(transcription):
     """
+    Send transcription text to Gemini API and fetch structured recipe information (async function).
     """
     try:
         # Define the structured prompt
         }
         headers = {"Content-Type": "application/json"}
+        # Send request to Gemini API asynchronously
+        async with aiohttp.ClientSession() as session:
+            async with session.post(
+                f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
+                json=payload,
+                headers=headers,
+                timeout=60  # 60 seconds timeout for the request
+            ) as response:
+                response.raise_for_status()  # Raise error if response code is not 200
+                data = await response.json()
         return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
+    except aiohttp.ClientError as e:
         print(f"Error querying Gemini API: {e}")
         return {"error": str(e)}
+# # Above code is without polling and sleep
+# import os
+# import whisper
+# import requests
+# from flask import Flask, request, jsonify, render_template
+# import tempfile
+# import warnings
+# warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
+# app = Flask(__name__)
+# print("APP IS RUNNING, ANIKET")
+# # Gemini API settings
+# from dotenv import load_dotenv
+# # Load the .env file
+# load_dotenv()
+# print("ENV LOADED, ANIKET")
+# # Fetch the API key from the .env file
+# API_KEY = os.getenv("FIRST_API_KEY")
+# # Ensure the API key is loaded correctly
+# if not API_KEY:
+#     raise ValueError("API Key not found. Make sure it is set in the .env file.")
+# GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
+# GEMINI_API_KEY = API_KEY
+# # Load Whisper AI model at startup
+# print("Loading Whisper AI model..., ANIKET")
+# whisper_model = whisper.load_model("base")  # Choose model size: tiny, base, small, medium, large
+# print("Whisper AI model loaded successfully, ANIKET")
+# # Define the "/" endpoint for health check
+# @app.route("/", methods=["GET"])
+# def health_check():
+#     return jsonify({"status": "success", "message": "API is running successfully!"}), 200
+# @app.route("/mbsa")
+# def mbsa():
+#     return render_template("mbsa.html")
+# @app.route('/process-audio', methods=['POST'])
+# def process_audio():
+#     print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
+#     """
+#     Flask endpoint to process audio:
+#     1. Transcribe provided audio file using Whisper AI.
+#     2. Send transcription to Gemini API for recipe information extraction.
+#     3. Return structured data in the response.
+#     """
+#     if 'audio' not in request.files:
+#         return jsonify({"error": "No audio file provided"}), 400
+#     audio_file = request.files['audio']
+#     print("AUDIO FILE NAME: ", audio_file)
+#     try:
+#         print("STARTING TRANSCRIPTION, ANIKET")
+#         # Step 1: Transcribe the uploaded audio file directly
+#         audio_file = request.files['audio']
+#         transcription = transcribe_audio(audio_file)
+#         print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
+#         if not transcription:
+#             return jsonify({"error": "Audio transcription failed"}), 500
+#         print("GOT THE transcription")
+#         print("Starting the GEMINI REQUEST TO STRUCTURE IT")
+#         # Step 2: Generate structured recipe information using Gemini API
+#         structured_data = query_gemini_api(transcription)
+#         print("GOT THE STRUCTURED DATA", structured_data)
+#         # Step 3: Return the structured data
+#         return jsonify(structured_data)
+#     except Exception as e:
+#         return jsonify({"error": str(e)}), 500
+# def transcribe_audio(audio_path):
+#     """
+#     Transcribe audio using Whisper AI.
+#     """
+#     print("CAME IN THE transcribe audio function")
+#     try:
+#         # Transcribe audio using Whisper AI
+#         print("Transcribing audio...")
+#         result = whisper_model.transcribe(audio_path)
+#         print("THE RESULTS ARE", result)
+#         return result.get("text", "").strip()
+#     except Exception as e:
+#         print(f"Error in transcription: {e}")
+#         return None
+# def query_gemini_api(transcription):
+#     """
+#     Send transcription text to Gemini API and fetch structured recipe information.
+#     """
+#     try:
+#         # Define the structured prompt
+#         prompt = (
+#             "Analyze the provided cooking video transcription and extract the following structured information:\n"
+#             "1. Recipe Name: Identify the name of the dish being prepared.\n"
+#             "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
+#             "3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
+#             "4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n"
+#             "5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n"
+#             "6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n"
+#             "7. Serving size: In count of people or portion size.\n"
+#             "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
+#             "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
+#             f"Text: {transcription}\n"
+#         )
+#         # Prepare the payload and headers
+#         payload = {
+#             "contents": [
+#                 {
+#                     "parts": [
+#                         {"text": prompt}
+#                     ]
+#                 }
+#             ]
+#         }
+#         headers = {"Content-Type": "application/json"}
+#         # Send request to Gemini API and wait for the response
+#         print("Querying Gemini API...")
+#         response = requests.post(
+#             f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
+#             json=payload,
+#             headers=headers,
+#             timeout=60  # 60 seconds timeout for the request
+#         )
+#         response.raise_for_status()
+#         # Extract and return the structured data
+#         data = response.json()
+#         return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
+#     except requests.exceptions.RequestException as e:
+#         print(f"Error querying Gemini API: {e}")
+#         return {"error": str(e)}
+# if __name__ == '__main__':
+#     app.run(debug=True)
 # import os