GoodML commited on
Commit
d85921f
·
verified ·
1 Parent(s): bb47241

some threading included

Browse files
Files changed (1) hide show
  1. app.py +39 -74
app.py CHANGED
@@ -4,16 +4,15 @@ import whisper
4
  import requests
5
  from flask import Flask, request, jsonify, send_file
6
  import tempfile
 
 
 
 
7
 
8
  app = Flask(__name__)
9
 
10
  # Gemini API settings
11
- from dotenv import load_dotenv
12
- import requests
13
- # Load the .env file
14
  load_dotenv()
15
-
16
- # Fetch the API key from the .env file
17
  API_KEY = os.getenv("FIRST_API_KEY")
18
 
19
  # Ensure the API key is loaded correctly
@@ -25,27 +24,45 @@ GEMINI_API_KEY = API_KEY
25
 
26
  # Load Whisper AI model at startup
27
  print("Loading Whisper AI model...")
28
- whisper_model = whisper.load_model("base") # Choose model size: tiny, base, small, medium, large
29
  print("Whisper AI model loaded successfully.")
30
 
31
-
32
  # Define the "/" endpoint for health check
33
  @app.route("/", methods=["GET"])
34
  def health_check():
35
  return jsonify({"status": "success", "message": "API is running successfully!"}), 200
36
 
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  @app.route('/process-video', methods=['POST'])
40
  def process_video():
41
- """
42
- Flask endpoint to process video:
43
- 1. Extract audio and transcribe using Whisper AI.
44
- 2. Send transcription to Gemini API for recipe information extraction.
45
- 3. Return structured data in the response.
46
- """
47
  if 'video' not in request.files:
48
- return jsonify({"error1": "No video file provided"}), 400
49
 
50
  video_file = request.files['video']
51
 
@@ -55,74 +72,28 @@ def process_video():
55
  video_file.save(temp_video_file.name)
56
  print(f"Video file saved: {temp_video_file.name}")
57
 
58
- # Extract audio and transcribe using Whisper AI
59
- transcription = transcribe_audio(temp_video_file.name)
60
-
61
- if not transcription:
62
- return jsonify({"error2": "Audio transcription failed"}), 500
63
 
64
- # Generate structured recipe information using Gemini API
65
- structured_data = query_gemini_api(transcription)
66
-
67
- return jsonify(structured_data)
68
 
69
  except Exception as e:
70
- return jsonify({"error3": str(e)}), 500
71
 
72
- finally:
73
- # Clean up temporary files
74
- if os.path.exists(temp_video_file.name):
75
- os.remove(temp_video_file.name)
76
-
77
-
78
- # def transcribe_audio(video_path):
79
- # """
80
- # Extract audio from video file and transcribe using Whisper AI.
81
- # """
82
- # try:
83
- # # Extract audio using ffmpeg
84
- # audio_path = video_path.replace(".mp4", ".wav")
85
- # command = [
86
- # "ffmpeg",
87
- # "-i", video_path,
88
- # "-q:a", "0",
89
- # "-map", "a",
90
- # audio_path
91
- # ]
92
- # subprocess.run(command, check=True)
93
- # print(f"Audio extracted to: {audio_path}")
94
-
95
- # # Transcribe audio using Whisper AI
96
- # print("Transcribing audio...")
97
- # result = whisper_model.transcribe(audio_path)
98
-
99
- # # Clean up audio file after transcription
100
- # if os.path.exists(audio_path):
101
- # os.remove(audio_path)
102
-
103
- # return result.get("text", "").strip()
104
-
105
- # except Exception as e:
106
- # print(f"Error in transcription: {e}")
107
- # return None
108
 
109
  def transcribe_audio(video_path):
110
  """
111
  Transcribe audio directly from a video file using Whisper AI.
112
  """
113
  try:
114
- # Transcribe audio from video directly using Whisper AI
115
  print(f"Transcribing video: {video_path}")
116
  result = whisper_model.transcribe(video_path)
117
-
118
  return result['text']
119
-
120
  except Exception as e:
121
- print(f"Error in transcription4: {e}")
122
  return None
123
 
124
 
125
-
126
  def query_gemini_api(transcription):
127
  """
128
  Send transcription text to Gemini API and fetch structured recipe information.
@@ -143,20 +114,14 @@ def query_gemini_api(transcription):
143
  f"Text: {transcription}\n"
144
  )
145
 
146
- # Prepare the payload and headers
147
  payload = {
148
  "contents": [
149
- {
150
- "parts": [
151
- {"text": prompt}
152
- ]
153
- }
154
  ]
155
  }
156
  headers = {"Content-Type": "application/json"}
157
 
158
  # Send request to Gemini API
159
- print("Querying Gemini API...")
160
  response = requests.post(
161
  f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
162
  json=payload,
@@ -170,8 +135,8 @@ def query_gemini_api(transcription):
170
 
171
  except requests.exceptions.RequestException as e:
172
  print(f"Error querying Gemini API: {e}")
173
- return {"error5": str(e)}
174
 
175
 
176
  if __name__ == '__main__':
177
- app.run(debug=True)
 
4
  import requests
5
  from flask import Flask, request, jsonify, send_file
6
  import tempfile
7
+ import warnings
8
+
9
+ warnings.filterwarnings("ignore", category=UserWarning, module="whisper")
10
+
11
 
12
  app = Flask(__name__)
13
 
14
  # Gemini API settings
 
 
 
15
  load_dotenv()
 
 
16
  API_KEY = os.getenv("FIRST_API_KEY")
17
 
18
  # Ensure the API key is loaded correctly
 
24
 
25
  # Load Whisper AI model at startup
26
  print("Loading Whisper AI model...")
27
+ whisper_model = whisper.load_model("base")
28
  print("Whisper AI model loaded successfully.")
29
 
 
30
  # Define the "/" endpoint for health check
31
  @app.route("/", methods=["GET"])
32
  def health_check():
33
  return jsonify({"status": "success", "message": "API is running successfully!"}), 200
34
 
35
 
36
+ def process_video_in_background(video_file, temp_video_file_name):
37
+ """
38
+ This function is executed in a separate thread to handle the long-running
39
+ video processing tasks such as transcription and querying the Gemini API.
40
+ """
41
+ try:
42
+ transcription = transcribe_audio(temp_video_file_name)
43
+
44
+ if not transcription:
45
+ print("Audio transcription failed")
46
+ return
47
+
48
+ structured_data = query_gemini_api(transcription)
49
+
50
+ # Send structured data back or store it in a database, depending on your use case
51
+ print("Processing complete. Structured data:", structured_data)
52
+
53
+ except Exception as e:
54
+ print(f"Error processing video: {e}")
55
+
56
+ finally:
57
+ # Clean up temporary files
58
+ if os.path.exists(temp_video_file_name):
59
+ os.remove(temp_video_file_name)
60
+
61
 
62
  @app.route('/process-video', methods=['POST'])
63
  def process_video():
 
 
 
 
 
 
64
  if 'video' not in request.files:
65
+ return jsonify({"error": "No video file provided"}), 400
66
 
67
  video_file = request.files['video']
68
 
 
72
  video_file.save(temp_video_file.name)
73
  print(f"Video file saved: {temp_video_file.name}")
74
 
75
+ # Start the video processing in a background thread
76
+ threading.Thread(target=process_video_in_background, args=(video_file, temp_video_file.name)).start()
 
 
 
77
 
78
+ return jsonify({"message": "Video is being processed in the background."}), 202
 
 
 
79
 
80
  except Exception as e:
81
+ return jsonify({"error": str(e)}), 500
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  def transcribe_audio(video_path):
85
  """
86
  Transcribe audio directly from a video file using Whisper AI.
87
  """
88
  try:
 
89
  print(f"Transcribing video: {video_path}")
90
  result = whisper_model.transcribe(video_path)
 
91
  return result['text']
 
92
  except Exception as e:
93
+ print(f"Error in transcription: {e}")
94
  return None
95
 
96
 
 
97
  def query_gemini_api(transcription):
98
  """
99
  Send transcription text to Gemini API and fetch structured recipe information.
 
114
  f"Text: {transcription}\n"
115
  )
116
 
 
117
  payload = {
118
  "contents": [
119
+ {"parts": [{"text": prompt}]}
 
 
 
 
120
  ]
121
  }
122
  headers = {"Content-Type": "application/json"}
123
 
124
  # Send request to Gemini API
 
125
  response = requests.post(
126
  f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
127
  json=payload,
 
135
 
136
  except requests.exceptions.RequestException as e:
137
  print(f"Error querying Gemini API: {e}")
138
+ return {"error": str(e)}
139
 
140
 
141
  if __name__ == '__main__':
142
+ app.run(debug=True)