GoodML commited on
Commit
985ead6
·
verified ·
1 Parent(s): aaac38d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +300 -25
app.py CHANGED
@@ -1,7 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import requests
3
  import cv2
4
  import re
 
5
  from flask import Flask, request, jsonify, render_template
6
  from deepgram import DeepgramClient, PrerecordedOptions
7
  from dotenv import load_dotenv
@@ -68,9 +352,6 @@ def transcribe_audio(wav_file_path):
68
 
69
  # Check if the response is valid
70
  if response:
71
- # print("Request successful! Processing response.")
72
-
73
- # Convert response to JSON string
74
  try:
75
  data_str = response.to_json(indent=4)
76
  except AttributeError as e:
@@ -89,11 +370,10 @@ def transcribe_audio(wav_file_path):
89
  return {"status": "error", "message": f"Error extracting transcript: {e}"}
90
 
91
  print(f"Transcript obtained: {transcript}")
92
- # Step: Save the transcript to a text file
93
  transcript_file_path = "transcript_from_transcribe_audio.txt"
94
  with open(transcript_file_path, "w", encoding="utf-8") as transcript_file:
95
  transcript_file.write(transcript)
96
- # print(f"Transcript saved to file: {transcript_file_path}")
97
 
98
  return transcript
99
  else:
@@ -166,6 +446,11 @@ def get_information_from_video_using_OCR(video_path, interval=1):
166
  return extracted_text
167
 
168
 
 
 
 
 
 
169
 
170
 
171
  @app.route('/process-video', methods=['POST'])
@@ -177,33 +462,30 @@ def process_video():
177
  temp_video_path = None
178
 
179
  try:
180
- # Step 1: Download the WAV file from the provided URL
181
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
182
  temp_video_path = temp_video_file.name
183
  download_video(video_url, temp_video_path)
184
- interval = 1
185
- # Step 2: get the information from the downloaded MP4 file synchronously
186
- video_info = get_information_from_video_using_OCR(temp_video_path, interval)
187
 
188
  if not video_info:
189
  video_info = ""
190
 
191
-
192
-
193
- # Step 2: Convert the MP4 to WAV
194
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav_file:
195
  temp_wav_path = temp_wav_file.name
196
  convert_mp4_to_wav(temp_video_path, temp_wav_path)
197
 
 
198
  audio_info = transcribe_audio(temp_wav_path)
199
 
200
- # If no transcription present, use an empty string
201
  if not audio_info:
202
  audio_info = ""
203
 
204
-
205
-
206
- # Step 3: Generate structured recipe information using Gemini API synchronously
207
  structured_data = query_gemini_api(video_info, audio_info)
208
 
209
  return jsonify(structured_data)
@@ -212,14 +494,10 @@ def process_video():
212
  return jsonify({"error": str(e)}), 500
213
 
214
  finally:
215
- # Clean up temporary audio file
216
  if temp_video_path and os.path.exists(temp_video_path):
217
  os.remove(temp_video_path)
218
- print(f"Temporary audio file deleted: {temp_video_path}")
219
-
220
-
221
-
222
-
223
 
224
 
225
  def query_gemini_api(video_transcription, audio_transcription):
@@ -239,10 +517,8 @@ def query_gemini_api(video_transcription, audio_transcription):
239
  "7. Serving size: In count of people or portion size.\n"
240
  "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
241
  "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
242
- "Also, make sure not to provide anything else or any other information or warning or text apart from the above things mentioned."
243
  f"Text: {audio_transcription}\n"
244
  f"Text: {video_transcription}\n"
245
-
246
  )
247
 
248
  # Prepare the payload and headers
@@ -278,4 +554,3 @@ def query_gemini_api(video_transcription, audio_transcription):
278
 
279
  if __name__ == '__main__':
280
  app.run(debug=True)
281
-
 
1
+ # import os
2
+ # import requests
3
+ # import cv2
4
+ # import re
5
+ # from flask import Flask, request, jsonify, render_template
6
+ # from deepgram import DeepgramClient, PrerecordedOptions
7
+ # from dotenv import load_dotenv
8
+ # import tempfile
9
+ # import json
10
+ # import subprocess
11
+
12
+
13
+ # import warnings
14
+ # warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
15
+
16
+ # app = Flask(__name__)
17
+ # print("APP IS RUNNING, ANIKET")
18
+
19
+ # # Load the .env file
20
+ # load_dotenv()
21
+
22
+ # print("ENV LOADED, ANIKET")
23
+
24
+ # # Fetch the API key from the .env file
25
+ # API_KEY = os.getenv("FIRST_API_KEY")
26
+ # DEEPGRAM_API_KEY = os.getenv("SECOND_API_KEY")
27
+
28
+ # # Ensure the API key is loaded correctly
29
+ # if not API_KEY:
30
+ # raise ValueError("API Key not found. Make sure it is set in the .env file.")
31
+
32
+ # if not DEEPGRAM_API_KEY:
33
+ # raise ValueError("DEEPGRAM_API_KEY not found. Make sure it is set in the .env file.")
34
+
35
+ # GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
36
+ # GEMINI_API_KEY = API_KEY
37
+
38
+ # @app.route("/", methods=["GET"])
39
+ # def health_check():
40
+ # return jsonify({"status": "success", "message": "API is running successfully!"}), 200
41
+
42
+
43
+ # def transcribe_audio(wav_file_path):
44
+ # """
45
+ # Transcribe audio from a video file using Deepgram API synchronously.
46
+
47
+ # Args:
48
+ # wav_file_path (str): Path to save the converted WAV file.
49
+ # Returns:
50
+ # dict: A dictionary containing status, transcript, or error message.
51
+ # """
52
+ # print("Entered the transcribe_audio function")
53
+ # try:
54
+ # # Initialize Deepgram client
55
+ # deepgram = DeepgramClient(DEEPGRAM_API_KEY)
56
+
57
+ # # Open the converted WAV file
58
+ # with open(wav_file_path, 'rb') as buffer_data:
59
+ # payload = {'buffer': buffer_data}
60
+
61
+ # # Configure transcription options
62
+ # options = PrerecordedOptions(
63
+ # smart_format=True, model="nova-2", language="en-US"
64
+ # )
65
+
66
+ # # Transcribe the audio
67
+ # response = deepgram.listen.prerecorded.v('1').transcribe_file(payload, options)
68
+
69
+ # # Check if the response is valid
70
+ # if response:
71
+ # # print("Request successful! Processing response.")
72
+
73
+ # # Convert response to JSON string
74
+ # try:
75
+ # data_str = response.to_json(indent=4)
76
+ # except AttributeError as e:
77
+ # return {"status": "error", "message": f"Error converting response to JSON: {e}"}
78
+
79
+ # # Parse the JSON string to a Python dictionary
80
+ # try:
81
+ # data = json.loads(data_str)
82
+ # except json.JSONDecodeError as e:
83
+ # return {"status": "error", "message": f"Error parsing JSON string: {e}"}
84
+
85
+ # # Extract the transcript
86
+ # try:
87
+ # transcript = data["results"]["channels"][0]["alternatives"][0]["transcript"]
88
+ # except KeyError as e:
89
+ # return {"status": "error", "message": f"Error extracting transcript: {e}"}
90
+
91
+ # print(f"Transcript obtained: {transcript}")
92
+ # # Step: Save the transcript to a text file
93
+ # transcript_file_path = "transcript_from_transcribe_audio.txt"
94
+ # with open(transcript_file_path, "w", encoding="utf-8") as transcript_file:
95
+ # transcript_file.write(transcript)
96
+ # # print(f"Transcript saved to file: {transcript_file_path}")
97
+
98
+ # return transcript
99
+ # else:
100
+ # return {"status": "error", "message": "Invalid response from Deepgram."}
101
+
102
+ # except FileNotFoundError:
103
+ # return {"status": "error", "message": f"Video file not found: {wav_file_path}"}
104
+ # except Exception as e:
105
+ # return {"status": "error", "message": f"Unexpected error: {e}"}
106
+ # finally:
107
+ # # Clean up the temporary WAV file
108
+ # if os.path.exists(wav_file_path):
109
+ # os.remove(wav_file_path)
110
+ # print(f"Temporary WAV file deleted: {wav_file_path}")
111
+
112
+
113
+
114
+ # def download_video(url, temp_video_path):
115
+ # """Download video (MP4 format) from the given URL and save it to temp_video_path."""
116
+ # response = requests.get(url, stream=True)
117
+ # if response.status_code == 200:
118
+ # with open(temp_video_path, 'wb') as f:
119
+ # for chunk in response.iter_content(chunk_size=1024):
120
+ # f.write(chunk)
121
+ # print(f"Audio downloaded successfully to {temp_video_path}")
122
+ # else:
123
+ # raise Exception(f"Failed to download audio, status code: {response.status_code}")
124
+
125
+
126
+ # def preprocess_frame(frame):
127
+ # """Preprocess the frame for better OCR accuracy."""
128
+ # gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
129
+ # denoised = cv2.medianBlur(gray, 3)
130
+ # _, thresh = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
131
+ # return thresh
132
+
133
+ # def clean_ocr_text(text):
134
+ # """Clean the OCR output by removing noise and unwanted characters."""
135
+ # cleaned_text = re.sub(r'[^A-Za-z0-9\s,.!?-]', '', text)
136
+ # cleaned_text = '\n'.join([line.strip() for line in cleaned_text.splitlines() if len(line.strip()) > 2])
137
+ # return cleaned_text
138
+
139
+ # def get_information_from_video_using_OCR(video_path, interval=1):
140
+ # """Extract text from video frames using OCR and return the combined text content."""
141
+ # cap = cv2.VideoCapture(video_path)
142
+ # fps = int(cap.get(cv2.CAP_PROP_FPS))
143
+ # frame_interval = interval * fps
144
+ # frame_count = 0
145
+ # extracted_text = ""
146
+
147
+ # print("Starting text extraction from video...")
148
+
149
+ # while cap.isOpened():
150
+ # ret, frame = cap.read()
151
+ # if not ret:
152
+ # break
153
+
154
+ # if frame_count % frame_interval == 0:
155
+ # preprocessed_frame = preprocess_frame(frame)
156
+ # text = pytesseract.image_to_string(preprocessed_frame, lang='eng', config='--psm 6 --oem 3')
157
+ # cleaned_text = clean_ocr_text(text)
158
+ # if cleaned_text:
159
+ # extracted_text += cleaned_text + "\n\n"
160
+ # print(f"Text found at frame {frame_count}: {cleaned_text[:50]}...")
161
+
162
+ # frame_count += 1
163
+
164
+ # cap.release()
165
+ # print("Text extraction completed.")
166
+ # return extracted_text
167
+
168
+
169
+
170
+
171
+ # @app.route('/process-video', methods=['POST'])
172
+ # def process_video():
173
+ # if 'videoUrl' not in request.json:
174
+ # return jsonify({"error": "No video URL provided"}), 400
175
+
176
+ # video_url = request.json['videoUrl']
177
+ # temp_video_path = None
178
+
179
+ # try:
180
+ # # Step 1: Download the WAV file from the provided URL
181
+ # with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
182
+ # temp_video_path = temp_video_file.name
183
+ # download_video(video_url, temp_video_path)
184
+ # interval = 1
185
+ # # Step 2: get the information from the downloaded MP4 file synchronously
186
+ # video_info = get_information_from_video_using_OCR(temp_video_path, interval)
187
+
188
+ # if not video_info:
189
+ # video_info = ""
190
+
191
+
192
+
193
+ # # Step 2: Convert the MP4 to WAV
194
+ # with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav_file:
195
+ # temp_wav_path = temp_wav_file.name
196
+ # convert_mp4_to_wav(temp_video_path, temp_wav_path)
197
+
198
+ # audio_info = transcribe_audio(temp_wav_path)
199
+
200
+ # # If no transcription present, use an empty string
201
+ # if not audio_info:
202
+ # audio_info = ""
203
+
204
+
205
+
206
+ # # Step 3: Generate structured recipe information using Gemini API synchronously
207
+ # structured_data = query_gemini_api(video_info, audio_info)
208
+
209
+ # return jsonify(structured_data)
210
+
211
+ # except Exception as e:
212
+ # return jsonify({"error": str(e)}), 500
213
+
214
+ # finally:
215
+ # # Clean up temporary audio file
216
+ # if temp_video_path and os.path.exists(temp_video_path):
217
+ # os.remove(temp_video_path)
218
+ # print(f"Temporary audio file deleted: {temp_video_path}")
219
+
220
+
221
+
222
+
223
+
224
+
225
+ # def query_gemini_api(video_transcription, audio_transcription):
226
+ # """
227
+ # Send transcription text to Gemini API and fetch structured recipe information synchronously.
228
+ # """
229
+ # try:
230
+ # # Define the structured prompt
231
+ # prompt = (
232
+ # "Analyze the provided cooking video and audio transcription combined and based on the combined information extract the following structured information:\n"
233
+ # "1. Recipe Name: Identify the name of the dish being prepared.\n"
234
+ # "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
235
+ # "3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
236
+ # "4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n"
237
+ # "5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n"
238
+ # "6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n"
239
+ # "7. Serving size: In count of people or portion size.\n"
240
+ # "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
241
+ # "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
242
+ # "Also, make sure not to provide anything else or any other information or warning or text apart from the above things mentioned."
243
+ # f"Text: {audio_transcription}\n"
244
+ # f"Text: {video_transcription}\n"
245
+
246
+ # )
247
+
248
+ # # Prepare the payload and headers
249
+ # payload = {
250
+ # "contents": [
251
+ # {
252
+ # "parts": [
253
+ # {"text": prompt}
254
+ # ]
255
+ # }
256
+ # ]
257
+ # }
258
+ # headers = {"Content-Type": "application/json"}
259
+
260
+ # # Send request to Gemini API synchronously
261
+ # response = requests.post(
262
+ # f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
263
+ # json=payload,
264
+ # headers=headers,
265
+ # )
266
+
267
+ # # Raise error if response code is not 200
268
+ # response.raise_for_status()
269
+
270
+ # data = response.json()
271
+
272
+ # return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
273
+
274
+ # except requests.exceptions.RequestException as e:
275
+ # print(f"Error querying Gemini API: {e}")
276
+ # return {"error": str(e)}
277
+
278
+
279
+ # if __name__ == '__main__':
280
+ # app.run(debug=True)
281
+
282
+
283
+
284
  import os
285
  import requests
286
  import cv2
287
  import re
288
+ import pytesseract
289
  from flask import Flask, request, jsonify, render_template
290
  from deepgram import DeepgramClient, PrerecordedOptions
291
  from dotenv import load_dotenv
 
352
 
353
  # Check if the response is valid
354
  if response:
 
 
 
355
  try:
356
  data_str = response.to_json(indent=4)
357
  except AttributeError as e:
 
370
  return {"status": "error", "message": f"Error extracting transcript: {e}"}
371
 
372
  print(f"Transcript obtained: {transcript}")
373
+ # Save the transcript to a text file
374
  transcript_file_path = "transcript_from_transcribe_audio.txt"
375
  with open(transcript_file_path, "w", encoding="utf-8") as transcript_file:
376
  transcript_file.write(transcript)
 
377
 
378
  return transcript
379
  else:
 
446
  return extracted_text
447
 
448
 
449
+ def convert_mp4_to_wav(mp4_path, wav_path):
450
+ """Convert an MP4 file to a WAV file."""
451
+ command = f"ffmpeg -i {mp4_path} -vn -acodec pcm_s16le -ar 44100 -ac 2 {wav_path}"
452
+ subprocess.run(command, shell=True, check=True)
453
+ print(f"MP4 file converted to WAV: {wav_path}")
454
 
455
 
456
  @app.route('/process-video', methods=['POST'])
 
462
  temp_video_path = None
463
 
464
  try:
465
+ # Step 1: Download the MP4 file from the provided URL
466
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
467
  temp_video_path = temp_video_file.name
468
  download_video(video_url, temp_video_path)
469
+
470
+ # Step 2: Get the information from the downloaded MP4 file synchronously
471
+ video_info = get_information_from_video_using_OCR(temp_video_path, interval=1)
472
 
473
  if not video_info:
474
  video_info = ""
475
 
476
+ # Step 3: Convert the MP4 to WAV
 
 
477
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav_file:
478
  temp_wav_path = temp_wav_file.name
479
  convert_mp4_to_wav(temp_video_path, temp_wav_path)
480
 
481
+ # Step 4: Transcribe the audio
482
  audio_info = transcribe_audio(temp_wav_path)
483
 
484
+ # If no transcription is present, use an empty string
485
  if not audio_info:
486
  audio_info = ""
487
 
488
+ # Step 5: Generate structured recipe information using Gemini API synchronously
 
 
489
  structured_data = query_gemini_api(video_info, audio_info)
490
 
491
  return jsonify(structured_data)
 
494
  return jsonify({"error": str(e)}), 500
495
 
496
  finally:
497
+ # Clean up temporary video file
498
  if temp_video_path and os.path.exists(temp_video_path):
499
  os.remove(temp_video_path)
500
+ print(f"Temporary video file deleted: {temp_video_path}")
 
 
 
 
501
 
502
 
503
  def query_gemini_api(video_transcription, audio_transcription):
 
517
  "7. Serving size: In count of people or portion size.\n"
518
  "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
519
  "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
 
520
  f"Text: {audio_transcription}\n"
521
  f"Text: {video_transcription}\n"
 
522
  )
523
 
524
  # Prepare the payload and headers
 
554
 
555
  if __name__ == '__main__':
556
  app.run(debug=True)