EladSpamson commited on
Commit
57dbbfb
·
verified ·
1 Parent(s): bbaa5e1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -36
app.py CHANGED
@@ -9,13 +9,9 @@ import psutil
9
  from flask import Flask, request, jsonify
10
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
11
 
12
- ###############################################################################
13
- # 1) Configure environment & set up model
14
- ###############################################################################
15
- os.environ["HF_HOME"] = "/tmp/hf_cache"
16
- os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
17
- os.environ["HF_DATASETS_CACHE"] = "/tmp/hf_cache"
18
- os.environ["XDG_CACHE_HOME"] = "/tmp"
19
 
20
  app = Flask(__name__)
21
 
@@ -26,50 +22,42 @@ model = WhisperForConditionalGeneration.from_pretrained(model_id)
26
  device = "cuda" if torch.cuda.is_available() else "cpu"
27
  model.to(device)
28
 
29
- # Force Hebrew transcription
30
  forced_decoder_ids = processor.get_decoder_prompt_ids(language="he", task="transcribe")
31
 
32
  WEBHOOK_URL = "https://hook.eu1.make.com/86zogci73u394k2uqpulp5yjjwgm8b9x"
33
 
34
- ###############################################################################
35
- # 2) Background transcription function
36
- ###############################################################################
37
  def transcribe_in_background(audio_url, file_id, company, user, file_name):
 
38
  try:
39
- # Download audio
40
  r = requests.get(audio_url)
41
  audio_path = "/tmp/temp_audio.wav"
42
  with open(audio_path, "wb") as f:
43
  f.write(r.content)
44
 
45
- # Load with librosa, limit to 1 hour
46
  waveform, sr = librosa.load(audio_path, sr=16000)
47
  max_sec = 3600
48
  waveform = waveform[: sr * max_sec]
49
 
50
- # Calculate callDuration
51
  call_duration = int(len(waveform) / sr)
52
 
53
- # Split audio into 25-second chunks
54
  chunk_sec = 25
55
  chunk_size = sr * chunk_sec
56
  chunks = [waveform[i : i + chunk_size] for i in range(0, len(waveform), chunk_size)]
57
-
58
  partial_text = ""
59
  for chunk in chunks:
60
  inputs = processor(chunk, sampling_rate=sr, return_tensors="pt", padding=True)
61
  input_features = inputs.input_features.to(device)
62
 
63
  with torch.no_grad():
64
- predicted_ids = model.generate(
65
- input_features,
66
- forced_decoder_ids=forced_decoder_ids
67
- )
68
-
69
  transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
70
  partial_text += transcription + "\n"
71
 
72
- # Post final transcription back to Make.com
73
  payload = {
74
  "Transcription": partial_text.strip(),
75
  "callDuration": call_duration,
@@ -81,6 +69,7 @@ def transcribe_in_background(audio_url, file_id, company, user, file_name):
81
  requests.post(WEBHOOK_URL, json=payload)
82
 
83
  except Exception as e:
 
84
  error_payload = {
85
  "error": str(e),
86
  "fileId": file_id,
@@ -90,23 +79,35 @@ def transcribe_in_background(audio_url, file_id, company, user, file_name):
90
  }
91
  requests.post(WEBHOOK_URL, json=error_payload)
92
 
93
- ###############################################################################
94
- # 3) Flask route: CPU check and transcription
95
- ###############################################################################
 
 
 
96
  @app.route("/transcribe", methods=["POST"])
97
  def transcribe_endpoint():
98
- # 1) Check CPU usage
99
- cpu_usage = psutil.cpu_percent(interval=0.1) # measure CPU usage over 0.1s
100
- if cpu_usage > 40.0:
101
- return jsonify({"error": "CPU is busy", "cpuUsage": cpu_usage}), 503
102
-
103
- # 2) Get JSON data
 
 
 
 
 
 
104
  data = request.get_json()
105
  audio_url = data.get("audio_url")
106
  if not audio_url:
 
 
 
107
  return jsonify({"error": "Missing 'audio_url' in request"}), 400
108
 
109
- # 3) Read headers
110
  file_id = request.headers.get("fileId", "")
111
  company = request.headers.get("company", "")
112
  user = request.headers.get("user", "")
@@ -119,14 +120,11 @@ def transcribe_endpoint():
119
  )
120
  thread.start()
121
 
122
- # 5) Return immediate response
123
  return jsonify({
124
  "status": "Received. Transcription in progress.",
125
  "note": "Results will be sent via webhook once done."
126
  }), 202
127
 
128
- ###############################################################################
129
- # 4) Run app locally; HF Spaces uses gunicorn
130
- ###############################################################################
131
  if __name__ == "__main__":
132
  app.run(host="0.0.0.0", port=7860)
 
9
  from flask import Flask, request, jsonify
10
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
11
 
12
+ # GLOBAL concurrency lock or counter
13
+ concurrent_requests = 0
14
+ concurrent_requests_lock = threading.Lock()
 
 
 
 
15
 
16
  app = Flask(__name__)
17
 
 
22
  device = "cuda" if torch.cuda.is_available() else "cpu"
23
  model.to(device)
24
 
 
25
  forced_decoder_ids = processor.get_decoder_prompt_ids(language="he", task="transcribe")
26
 
27
  WEBHOOK_URL = "https://hook.eu1.make.com/86zogci73u394k2uqpulp5yjjwgm8b9x"
28
 
 
 
 
29
  def transcribe_in_background(audio_url, file_id, company, user, file_name):
30
+ global concurrent_requests
31
  try:
32
+ # 1) Download audio
33
  r = requests.get(audio_url)
34
  audio_path = "/tmp/temp_audio.wav"
35
  with open(audio_path, "wb") as f:
36
  f.write(r.content)
37
 
38
+ # 2) Load audio
39
  waveform, sr = librosa.load(audio_path, sr=16000)
40
  max_sec = 3600
41
  waveform = waveform[: sr * max_sec]
42
 
 
43
  call_duration = int(len(waveform) / sr)
44
 
45
+ # 3) Transcribe in chunks
46
  chunk_sec = 25
47
  chunk_size = sr * chunk_sec
48
  chunks = [waveform[i : i + chunk_size] for i in range(0, len(waveform), chunk_size)]
49
+
50
  partial_text = ""
51
  for chunk in chunks:
52
  inputs = processor(chunk, sampling_rate=sr, return_tensors="pt", padding=True)
53
  input_features = inputs.input_features.to(device)
54
 
55
  with torch.no_grad():
56
+ predicted_ids = model.generate(input_features, forced_decoder_ids=forced_decoder_ids)
 
 
 
 
57
  transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
58
  partial_text += transcription + "\n"
59
 
60
+ # 4) Post final transcription
61
  payload = {
62
  "Transcription": partial_text.strip(),
63
  "callDuration": call_duration,
 
69
  requests.post(WEBHOOK_URL, json=payload)
70
 
71
  except Exception as e:
72
+ # 5) Handle errors
73
  error_payload = {
74
  "error": str(e),
75
  "fileId": file_id,
 
79
  }
80
  requests.post(WEBHOOK_URL, json=error_payload)
81
 
82
+ finally:
83
+ # Always decrement concurrency, even on error
84
+ with concurrent_requests_lock:
85
+ global concurrent_requests
86
+ concurrent_requests -= 1
87
+
88
  @app.route("/transcribe", methods=["POST"])
89
  def transcribe_endpoint():
90
+ global concurrent_requests
91
+
92
+ # 1) Check concurrency
93
+ with concurrent_requests_lock:
94
+ if concurrent_requests >= 1:
95
+ # We only allow ONE job at a time
96
+ return jsonify({"error": "Server is busy with another transcription"}), 503
97
+
98
+ # If it's free, claim the slot
99
+ concurrent_requests += 1
100
+
101
+ # 2) Parse request
102
  data = request.get_json()
103
  audio_url = data.get("audio_url")
104
  if not audio_url:
105
+ # Since we've already claimed concurrency=1, we should free it
106
+ with concurrent_requests_lock:
107
+ concurrent_requests -= 1
108
  return jsonify({"error": "Missing 'audio_url' in request"}), 400
109
 
110
+ # 3) Read custom headers
111
  file_id = request.headers.get("fileId", "")
112
  company = request.headers.get("company", "")
113
  user = request.headers.get("user", "")
 
120
  )
121
  thread.start()
122
 
123
+ # 5) Return an immediate response
124
  return jsonify({
125
  "status": "Received. Transcription in progress.",
126
  "note": "Results will be sent via webhook once done."
127
  }), 202
128
 
 
 
 
129
  if __name__ == "__main__":
130
  app.run(host="0.0.0.0", port=7860)