GoodML commited on
Commit
20b2044
·
verified ·
1 Parent(s): abd67f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +209 -209
app.py CHANGED
@@ -1,175 +1,16 @@
1
- # import os
2
- # import whisper
3
- # import requests
4
- # import asyncio
5
- # import aiohttp # For making async HTTP requests
6
- # from quart import Quart, request, jsonify, render_template
7
- # from dotenv import load_dotenv
8
- # import warnings
9
- # warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
10
-
11
- # app = Quart(__name__)
12
- # print("APP IS RUNNING, ANIKET")
13
-
14
- # # Load the .env file
15
- # load_dotenv()
16
-
17
- # print("ENV LOADED, ANIKET")
18
-
19
- # # Fetch the API key from the .env file
20
- # API_KEY = os.getenv("FIRST_API_KEY")
21
-
22
- # # Ensure the API key is loaded correctly
23
- # if not API_KEY:
24
- # raise ValueError("API Key not found. Make sure it is set in the .env file.")
25
-
26
- # GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
27
- # GEMINI_API_KEY = API_KEY
28
-
29
- # # Load Whisper AI model at startup
30
- # print("Loading Whisper AI model..., ANIKET")
31
- # whisper_model = whisper.load_model("base") # Choose model size: tiny, base, small, medium, large
32
- # print("Whisper AI model loaded successfully, ANIKET")
33
-
34
-
35
- # @app.route("/", methods=["GET"])
36
- # async def health_check():
37
- # return jsonify({"status": "success", "message": "API is running successfully!"}), 200
38
-
39
-
40
- # @app.route("/mbsa")
41
- # async def mbsa():
42
- # return await render_template("mbsa.html")
43
-
44
-
45
- # @app.route('/process-audio', methods=['POST'])
46
- # async def process_audio():
47
- # print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
48
-
49
- # if 'audio' not in request.files:
50
- # return jsonify({"error": "No audio file provided"}), 400
51
-
52
- # audio_file = request.files['audio']
53
- # print("AUDIO FILE NAME: ", audio_file)
54
-
55
- # try:
56
- # print("STARTING TRANSCRIPTION, ANIKET")
57
-
58
- # # Step 1: Transcribe the uploaded audio file asynchronously
59
- # transcription = await transcribe_audio(audio_file)
60
-
61
- # print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
62
-
63
- # if not transcription:
64
- # return jsonify({"error": "Audio transcription failed"}), 500
65
-
66
- # print("GOT THE transcription")
67
-
68
- # print("Starting the GEMINI REQUEST TO STRUCTURE IT")
69
- # # Step 2: Generate structured recipe information using Gemini API asynchronously
70
- # structured_data = await query_gemini_api(transcription)
71
-
72
- # print("GOT THE STRUCTURED DATA", structured_data)
73
- # # Step 3: Return the structured data
74
- # return jsonify(structured_data)
75
-
76
- # except Exception as e:
77
- # return jsonify({"error": str(e)}), 500
78
-
79
-
80
- # async def transcribe_audio(audio_file):
81
- # """
82
- # Transcribe audio using Whisper AI (async function).
83
- # """
84
- # print("CAME IN THE transcribe audio function")
85
- # try:
86
- # with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
87
- # audio_file.save(temp_audio_file.name)
88
- # print(f"Temporary audio file saved: {temp_audio_file.name}")
89
-
90
- # # Run Whisper transcription asynchronously
91
- # loop = asyncio.get_event_loop()
92
- # result = await loop.run_in_executor(None, whisper_model.transcribe, temp_audio_file.name)
93
- # print("THE RESULTS ARE", result)
94
-
95
- # return result.get("text", "").strip()
96
-
97
- # except Exception as e:
98
- # print(f"Error in transcription: {e}")
99
- # return None
100
-
101
-
102
- # async def query_gemini_api(transcription):
103
- # """
104
- # Send transcription text to Gemini API and fetch structured recipe information (async function).
105
- # """
106
- # try:
107
- # # Define the structured prompt
108
- # prompt = (
109
- # "Analyze the provided cooking video transcription and extract the following structured information:\n"
110
- # "1. Recipe Name: Identify the name of the dish being prepared.\n"
111
- # "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
112
- # "3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
113
- # "4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n"
114
- # "5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n"
115
- # "6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n"
116
- # "7. Serving size: In count of people or portion size.\n"
117
- # "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
118
- # "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
119
- # f"Text: {transcription}\n"
120
- # )
121
-
122
- # # Prepare the payload and headers
123
- # payload = {
124
- # "contents": [
125
- # {
126
- # "parts": [
127
- # {"text": prompt}
128
- # ]
129
- # }
130
- # ]
131
- # }
132
- # headers = {"Content-Type": "application/json"}
133
-
134
- # # Send request to Gemini API asynchronously
135
- # async with aiohttp.ClientSession() as session:
136
- # async with session.post(
137
- # f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
138
- # json=payload,
139
- # headers=headers,
140
- # timeout=60 # 60 seconds timeout for the request
141
- # ) as response:
142
- # response.raise_for_status() # Raise error if response code is not 200
143
- # data = await response.json()
144
-
145
- # return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
146
-
147
- # except aiohttp.ClientError as e:
148
- # print(f"Error querying Gemini API: {e}")
149
- # return {"error": str(e)}
150
-
151
-
152
- # if __name__ == '__main__':
153
- # app.run(debug=True)
154
-
155
-
156
-
157
-
158
-
159
- # Above code is without polling and sleep
160
  import os
161
  import whisper
162
  import requests
163
- from flask import Flask, request, jsonify, render_template
164
- import tempfile
 
 
165
  import warnings
166
  warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
167
 
168
- app = Flask(__name__)
169
  print("APP IS RUNNING, ANIKET")
170
 
171
- # Gemini API settings
172
- from dotenv import load_dotenv
173
  # Load the .env file
174
  load_dotenv()
175
 
@@ -185,73 +26,72 @@ if not API_KEY:
185
  GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
186
  GEMINI_API_KEY = API_KEY
187
 
188
-
189
  # Load Whisper AI model at startup
190
  print("Loading Whisper AI model..., ANIKET")
191
  whisper_model = whisper.load_model("base") # Choose model size: tiny, base, small, medium, large
192
  print("Whisper AI model loaded successfully, ANIKET")
193
 
194
 
195
- # Define the "/" endpoint for health check
196
  @app.route("/", methods=["GET"])
197
- def health_check():
198
  return jsonify({"status": "success", "message": "API is running successfully!"}), 200
199
 
 
200
  @app.route("/mbsa")
201
- def mbsa():
202
- return render_template("mbsa.html")
 
203
 
204
  @app.route('/process-audio', methods=['POST'])
205
- def process_audio():
206
  print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
207
- """
208
- Flask endpoint to process audio:
209
- 1. Transcribe provided audio file using Whisper AI.
210
- 2. Send transcription to Gemini API for recipe information extraction.
211
- 3. Return structured data in the response.
212
- """
213
 
214
  if 'audio' not in request.files:
215
  return jsonify({"error": "No audio file provided"}), 400
216
 
217
  audio_file = request.files['audio']
218
  print("AUDIO FILE NAME: ", audio_file)
219
-
220
  try:
221
  print("STARTING TRANSCRIPTION, ANIKET")
222
- # Step 1: Transcribe the uploaded audio file directly
223
- audio_file = request.files['audio']
224
- transcription = transcribe_audio(audio_file)
225
-
226
  print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
227
-
228
  if not transcription:
229
  return jsonify({"error": "Audio transcription failed"}), 500
230
-
231
  print("GOT THE transcription")
232
-
233
  print("Starting the GEMINI REQUEST TO STRUCTURE IT")
234
- # Step 2: Generate structured recipe information using Gemini API
235
- structured_data = query_gemini_api(transcription)
236
-
237
  print("GOT THE STRUCTURED DATA", structured_data)
238
  # Step 3: Return the structured data
239
  return jsonify(structured_data)
240
-
241
  except Exception as e:
242
  return jsonify({"error": str(e)}), 500
243
 
244
- def transcribe_audio(audio_path):
 
245
  """
246
- Transcribe audio using Whisper AI.
247
  """
248
  print("CAME IN THE transcribe audio function")
249
  try:
250
- # Transcribe audio using Whisper AI
251
- print("Transcribing audio...")
252
- result = whisper_model.transcribe(audio_path)
253
- print("THE RESULTS ARE", result)
254
-
 
 
 
 
255
  return result.get("text", "").strip()
256
 
257
  except Exception as e:
@@ -259,9 +99,9 @@ def transcribe_audio(audio_path):
259
  return None
260
 
261
 
262
- def query_gemini_api(transcription):
263
  """
264
- Send transcription text to Gemini API and fetch structured recipe information.
265
  """
266
  try:
267
  # Define the structured prompt
@@ -291,21 +131,20 @@ def query_gemini_api(transcription):
291
  }
292
  headers = {"Content-Type": "application/json"}
293
 
294
- # Send request to Gemini API and wait for the response
295
- print("Querying Gemini API...")
296
- response = requests.post(
297
- f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
298
- json=payload,
299
- headers=headers,
300
- timeout=60 # 60 seconds timeout for the request
301
- )
302
- response.raise_for_status()
 
303
 
304
- # Extract and return the structured data
305
- data = response.json()
306
  return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
307
 
308
- except requests.exceptions.RequestException as e:
309
  print(f"Error querying Gemini API: {e}")
310
  return {"error": str(e)}
311
 
@@ -317,6 +156,167 @@ if __name__ == '__main__':
317
 
318
 
319
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
 
321
 
322
  # import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import whisper
3
  import requests
4
+ import asyncio
5
+ import aiohttp # For making async HTTP requests
6
+ from quart import Quart, request, jsonify, render_template
7
+ from dotenv import load_dotenv
8
  import warnings
9
  warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
10
 
11
+ app = Quart(__name__)
12
  print("APP IS RUNNING, ANIKET")
13
 
 
 
14
  # Load the .env file
15
  load_dotenv()
16
 
 
26
  GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
27
  GEMINI_API_KEY = API_KEY
28
 
 
29
  # Load Whisper AI model at startup
30
  print("Loading Whisper AI model..., ANIKET")
31
  whisper_model = whisper.load_model("base") # Choose model size: tiny, base, small, medium, large
32
  print("Whisper AI model loaded successfully, ANIKET")
33
 
34
 
 
35
  @app.route("/", methods=["GET"])
36
+ async def health_check():
37
  return jsonify({"status": "success", "message": "API is running successfully!"}), 200
38
 
39
+
40
  @app.route("/mbsa")
41
+ async def mbsa():
42
+ return await render_template("mbsa.html")
43
+
44
 
45
  @app.route('/process-audio', methods=['POST'])
46
+ async def process_audio():
47
  print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
 
 
 
 
 
 
48
 
49
  if 'audio' not in request.files:
50
  return jsonify({"error": "No audio file provided"}), 400
51
 
52
  audio_file = request.files['audio']
53
  print("AUDIO FILE NAME: ", audio_file)
54
+
55
  try:
56
  print("STARTING TRANSCRIPTION, ANIKET")
57
+
58
+ # Step 1: Transcribe the uploaded audio file asynchronously
59
+ transcription = await transcribe_audio(audio_file)
60
+
61
  print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
62
+
63
  if not transcription:
64
  return jsonify({"error": "Audio transcription failed"}), 500
65
+
66
  print("GOT THE transcription")
67
+
68
  print("Starting the GEMINI REQUEST TO STRUCTURE IT")
69
+ # Step 2: Generate structured recipe information using Gemini API asynchronously
70
+ structured_data = await query_gemini_api(transcription)
71
+
72
  print("GOT THE STRUCTURED DATA", structured_data)
73
  # Step 3: Return the structured data
74
  return jsonify(structured_data)
75
+
76
  except Exception as e:
77
  return jsonify({"error": str(e)}), 500
78
 
79
+
80
+ async def transcribe_audio(audio_file):
81
  """
82
+ Transcribe audio using Whisper AI (async function).
83
  """
84
  print("CAME IN THE transcribe audio function")
85
  try:
86
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
87
+ audio_file.save(temp_audio_file.name)
88
+ print(f"Temporary audio file saved: {temp_audio_file.name}")
89
+
90
+ # Run Whisper transcription asynchronously
91
+ loop = asyncio.get_event_loop()
92
+ result = await loop.run_in_executor(None, whisper_model.transcribe, temp_audio_file.name)
93
+ print("THE RESULTS ARE", result)
94
+
95
  return result.get("text", "").strip()
96
 
97
  except Exception as e:
 
99
  return None
100
 
101
 
102
+ async def query_gemini_api(transcription):
103
  """
104
+ Send transcription text to Gemini API and fetch structured recipe information (async function).
105
  """
106
  try:
107
  # Define the structured prompt
 
131
  }
132
  headers = {"Content-Type": "application/json"}
133
 
134
+ # Send request to Gemini API asynchronously
135
+ async with aiohttp.ClientSession() as session:
136
+ async with session.post(
137
+ f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
138
+ json=payload,
139
+ headers=headers,
140
+ timeout=60 # 60 seconds timeout for the request
141
+ ) as response:
142
+ response.raise_for_status() # Raise error if response code is not 200
143
+ data = await response.json()
144
 
 
 
145
  return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
146
 
147
+ except aiohttp.ClientError as e:
148
  print(f"Error querying Gemini API: {e}")
149
  return {"error": str(e)}
150
 
 
156
 
157
 
158
 
159
+ # # Above code is without polling and sleep
160
+ # import os
161
+ # import whisper
162
+ # import requests
163
+ # from flask import Flask, request, jsonify, render_template
164
+ # import tempfile
165
+ # import warnings
166
+ # warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
167
+
168
+ # app = Flask(__name__)
169
+ # print("APP IS RUNNING, ANIKET")
170
+
171
+ # # Gemini API settings
172
+ # from dotenv import load_dotenv
173
+ # # Load the .env file
174
+ # load_dotenv()
175
+
176
+ # print("ENV LOADED, ANIKET")
177
+
178
+ # # Fetch the API key from the .env file
179
+ # API_KEY = os.getenv("FIRST_API_KEY")
180
+
181
+ # # Ensure the API key is loaded correctly
182
+ # if not API_KEY:
183
+ # raise ValueError("API Key not found. Make sure it is set in the .env file.")
184
+
185
+ # GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
186
+ # GEMINI_API_KEY = API_KEY
187
+
188
+
189
+ # # Load Whisper AI model at startup
190
+ # print("Loading Whisper AI model..., ANIKET")
191
+ # whisper_model = whisper.load_model("base") # Choose model size: tiny, base, small, medium, large
192
+ # print("Whisper AI model loaded successfully, ANIKET")
193
+
194
+
195
+ # # Define the "/" endpoint for health check
196
+ # @app.route("/", methods=["GET"])
197
+ # def health_check():
198
+ # return jsonify({"status": "success", "message": "API is running successfully!"}), 200
199
+
200
+ # @app.route("/mbsa")
201
+ # def mbsa():
202
+ # return render_template("mbsa.html")
203
+
204
+ # @app.route('/process-audio', methods=['POST'])
205
+ # def process_audio():
206
+ # print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
207
+ # """
208
+ # Flask endpoint to process audio:
209
+ # 1. Transcribe provided audio file using Whisper AI.
210
+ # 2. Send transcription to Gemini API for recipe information extraction.
211
+ # 3. Return structured data in the response.
212
+ # """
213
+
214
+ # if 'audio' not in request.files:
215
+ # return jsonify({"error": "No audio file provided"}), 400
216
+
217
+ # audio_file = request.files['audio']
218
+ # print("AUDIO FILE NAME: ", audio_file)
219
+
220
+ # try:
221
+ # print("STARTING TRANSCRIPTION, ANIKET")
222
+ # # Step 1: Transcribe the uploaded audio file directly
223
+ # audio_file = request.files['audio']
224
+ # transcription = transcribe_audio(audio_file)
225
+
226
+ # print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
227
+
228
+ # if not transcription:
229
+ # return jsonify({"error": "Audio transcription failed"}), 500
230
+
231
+ # print("GOT THE transcription")
232
+
233
+ # print("Starting the GEMINI REQUEST TO STRUCTURE IT")
234
+ # # Step 2: Generate structured recipe information using Gemini API
235
+ # structured_data = query_gemini_api(transcription)
236
+
237
+ # print("GOT THE STRUCTURED DATA", structured_data)
238
+ # # Step 3: Return the structured data
239
+ # return jsonify(structured_data)
240
+
241
+ # except Exception as e:
242
+ # return jsonify({"error": str(e)}), 500
243
+
244
+ # def transcribe_audio(audio_path):
245
+ # """
246
+ # Transcribe audio using Whisper AI.
247
+ # """
248
+ # print("CAME IN THE transcribe audio function")
249
+ # try:
250
+ # # Transcribe audio using Whisper AI
251
+ # print("Transcribing audio...")
252
+ # result = whisper_model.transcribe(audio_path)
253
+ # print("THE RESULTS ARE", result)
254
+
255
+ # return result.get("text", "").strip()
256
+
257
+ # except Exception as e:
258
+ # print(f"Error in transcription: {e}")
259
+ # return None
260
+
261
+
262
+ # def query_gemini_api(transcription):
263
+ # """
264
+ # Send transcription text to Gemini API and fetch structured recipe information.
265
+ # """
266
+ # try:
267
+ # # Define the structured prompt
268
+ # prompt = (
269
+ # "Analyze the provided cooking video transcription and extract the following structured information:\n"
270
+ # "1. Recipe Name: Identify the name of the dish being prepared.\n"
271
+ # "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
272
+ # "3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
273
+ # "4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n"
274
+ # "5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n"
275
+ # "6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n"
276
+ # "7. Serving size: In count of people or portion size.\n"
277
+ # "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
278
+ # "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
279
+ # f"Text: {transcription}\n"
280
+ # )
281
+
282
+ # # Prepare the payload and headers
283
+ # payload = {
284
+ # "contents": [
285
+ # {
286
+ # "parts": [
287
+ # {"text": prompt}
288
+ # ]
289
+ # }
290
+ # ]
291
+ # }
292
+ # headers = {"Content-Type": "application/json"}
293
+
294
+ # # Send request to Gemini API and wait for the response
295
+ # print("Querying Gemini API...")
296
+ # response = requests.post(
297
+ # f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
298
+ # json=payload,
299
+ # headers=headers,
300
+ # timeout=60 # 60 seconds timeout for the request
301
+ # )
302
+ # response.raise_for_status()
303
+
304
+ # # Extract and return the structured data
305
+ # data = response.json()
306
+ # return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
307
+
308
+ # except requests.exceptions.RequestException as e:
309
+ # print(f"Error querying Gemini API: {e}")
310
+ # return {"error": str(e)}
311
+
312
+
313
+ # if __name__ == '__main__':
314
+ # app.run(debug=True)
315
+
316
+
317
+
318
+
319
+
320
 
321
 
322
  # import os