EladSpamson commited on
Commit
965be1d
·
verified ·
1 Parent(s): 57b876b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -53
app.py CHANGED
@@ -1,21 +1,24 @@
1
  import os
2
-
3
- # Environment variables to avoid permission issues
4
- os.environ["HF_HOME"] = "/tmp/hf_cache"
5
- os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
6
- os.environ["HF_DATASETS_CACHE"] = "/tmp/hf_cache"
7
- os.environ["XDG_CACHE_HOME"] = "/tmp"
8
-
9
- from flask import Flask, request, jsonify, Response
10
  import json
11
  import requests
 
12
  import torch
13
  import librosa
 
 
14
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
15
 
 
 
 
 
 
 
 
 
16
  app = Flask(__name__)
17
 
18
- # Use your custom Hebrew Whisper model (example: ivrit-ai/whisper-large-v3-turbo)
19
  model_id = "ivrit-ai/whisper-large-v3-turbo"
20
  processor = WhisperProcessor.from_pretrained(model_id)
21
  model = WhisperForConditionalGeneration.from_pretrained(model_id)
@@ -23,44 +26,66 @@ model = WhisperForConditionalGeneration.from_pretrained(model_id)
23
  device = "cuda" if torch.cuda.is_available() else "cpu"
24
  model.to(device)
25
 
26
- # Force Hebrew to skip auto-detect
27
  forced_decoder_ids = processor.get_decoder_prompt_ids(language="he", task="transcribe")
28
 
29
- def transcribe_audio(audio_url):
30
- # 1) Download audio file
31
- response = requests.get(audio_url)
32
- audio_path = "/tmp/temp_audio.wav"
33
- with open(audio_path, "wb") as f:
34
- f.write(response.content)
35
-
36
- # 2) Load audio with librosa
37
- waveform, sr = librosa.load(audio_path, sr=16000)
38
-
39
- # 3) Limit to 1 hour
40
- waveform = waveform[: sr * 3600]
41
-
42
- # 4) Split into 25-second chunks
43
- chunk_sec = 25
44
- chunk_size = sr * chunk_sec
45
- chunks = [waveform[i : i + chunk_size] for i in range(0, len(waveform), chunk_size)]
46
-
47
- partial_text = ""
48
- for chunk in chunks:
49
- inputs = processor(chunk, sampling_rate=sr, return_tensors="pt", padding=True)
50
- input_features = inputs.input_features.to(device)
51
-
52
- # Generate forced-Hebrew transcription
53
- with torch.no_grad():
54
- predicted_ids = model.generate(
55
- input_features,
56
- forced_decoder_ids=forced_decoder_ids
57
- )
58
-
59
- transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
60
- partial_text += transcription + "\n"
61
-
62
- return partial_text.strip()
63
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  @app.route("/transcribe", methods=["POST"])
65
  def transcribe_endpoint():
66
  data = request.get_json()
@@ -68,15 +93,19 @@ def transcribe_endpoint():
68
  if not audio_url:
69
  return jsonify({"error": "Missing 'audio_url' in request"}), 400
70
 
71
- text = transcribe_audio(audio_url)
 
 
 
 
 
 
 
 
72
 
73
- # Return Hebrew characters directly
74
- payload = {"Transcription": text}
75
- return Response(
76
- json.dumps(payload, ensure_ascii=False),
77
- status=200,
78
- mimetype="application/json; charset=utf-8"
79
- )
80
 
 
 
 
81
  if __name__ == "__main__":
82
  app.run(host="0.0.0.0", port=7860)
 
1
  import os
 
 
 
 
 
 
 
 
2
  import json
3
  import requests
4
+ import threading
5
  import torch
6
  import librosa
7
+
8
+ from flask import Flask, request, jsonify
9
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
10
 
11
+ ###############################################################################
12
+ # 1) Configure environment to avoid permission issues & set up model
13
+ ###############################################################################
14
+ os.environ["HF_HOME"] = "/tmp/hf_cache"
15
+ os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
16
+ os.environ["HF_DATASETS_CACHE"] = "/tmp/hf_cache"
17
+ os.environ["XDG_CACHE_HOME"] = "/tmp"
18
+
19
  app = Flask(__name__)
20
 
21
+ # Example: your custom Hebrew model
22
  model_id = "ivrit-ai/whisper-large-v3-turbo"
23
  processor = WhisperProcessor.from_pretrained(model_id)
24
  model = WhisperForConditionalGeneration.from_pretrained(model_id)
 
26
  device = "cuda" if torch.cuda.is_available() else "cpu"
27
  model.to(device)
28
 
29
+ # Force Hebrew transcription (skip auto-detect)
30
  forced_decoder_ids = processor.get_decoder_prompt_ids(language="he", task="transcribe")
31
 
32
+ # Where we send the final transcription
33
+ WEBHOOK_URL = "https://hook.eu1.make.com/86zogci73u394k2uqpulp5yjjwgm8b9x"
34
+
35
+ ###############################################################################
36
+ # 2) Background transcription function
37
+ ###############################################################################
38
+ def transcribe_in_background(audio_url):
39
+ """
40
+ Called by a background thread. Downloads & transcribes audio,
41
+ then sends results to your Make.com webhook.
42
+ """
43
+ try:
44
+ # Download audio
45
+ r = requests.get(audio_url)
46
+ audio_path = "/tmp/temp_audio.wav"
47
+ with open(audio_path, "wb") as f:
48
+ f.write(r.content)
49
+
50
+ # Load with librosa
51
+ waveform, sr = librosa.load(audio_path, sr=16000)
52
+
53
+ # Optional limit ~1 hour
54
+ max_sec = 3600
55
+ waveform = waveform[: sr * max_sec]
56
+
57
+ # Split audio into 25-second chunks
58
+ chunk_sec = 25
59
+ chunk_size = sr * chunk_sec
60
+ chunks = [waveform[i : i + chunk_size] for i in range(0, len(waveform), chunk_size)]
61
+
62
+ partial_text = ""
63
+ for chunk in chunks:
64
+ inputs = processor(chunk, sampling_rate=sr, return_tensors="pt", padding=True)
65
+ input_features = inputs.input_features.to(device)
66
+
67
+ with torch.no_grad():
68
+ predicted_ids = model.generate(
69
+ input_features,
70
+ forced_decoder_ids=forced_decoder_ids
71
+ )
72
+
73
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
74
+ partial_text += transcription + "\n"
75
+
76
+ # Post final transcription back to Make.com
77
+ payload = {"Transcription": partial_text.strip()}
78
+ requests.post(WEBHOOK_URL, json=payload)
79
+
80
+ except Exception as e:
81
+ # In case of errors, notify the webhook
82
+ error_payload = {"error": str(e)}
83
+ requests.post(WEBHOOK_URL, json=error_payload)
84
+
85
+
86
+ ###############################################################################
87
+ # 3) Flask route: returns immediately, does the heavy lifting in a thread
88
+ ###############################################################################
89
  @app.route("/transcribe", methods=["POST"])
90
  def transcribe_endpoint():
91
  data = request.get_json()
 
93
  if not audio_url:
94
  return jsonify({"error": "Missing 'audio_url' in request"}), 400
95
 
96
+ # Spawn a thread to handle transcription & webhook
97
+ thread = threading.Thread(target=transcribe_in_background, args=(audio_url,))
98
+ thread.start()
99
+
100
+ # Immediately return a JSON response to Make.com
101
+ return jsonify({
102
+ "status": "Received. Transcription in progress.",
103
+ "note": "Results will be sent via webhook once done."
104
+ }), 202
105
 
 
 
 
 
 
 
 
106
 
107
+ ###############################################################################
108
+ # 4) Run app if local, else Hugging Face will use gunicorn.
109
+ ###############################################################################
110
  if __name__ == "__main__":
111
  app.run(host="0.0.0.0", port=7860)