EladSpamson commited on
Commit
5b89128
·
verified ·
1 Parent(s): 5faf489

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -11
app.py CHANGED
@@ -15,33 +15,32 @@ from transformers import WhisperProcessor, WhisperForConditionalGeneration
15
 
16
  app = Flask(__name__)
17
 
18
- # Choose a multilingual Whisper model that includes Hebrew.
19
- # For CPU usage, 'openai/whisper-base' or 'openai/whisper-tiny' are typical.
20
- model_id = "openai/whisper-base"
21
  processor = WhisperProcessor.from_pretrained(model_id)
22
  model = WhisperForConditionalGeneration.from_pretrained(model_id)
23
 
24
  device = "cuda" if torch.cuda.is_available() else "cpu"
25
  model.to(device)
26
 
27
- # Force Hebrew to avoid short-audio meltdown with auto-detect
28
  forced_decoder_ids = processor.get_decoder_prompt_ids(language="he", task="transcribe")
29
 
30
  def transcribe_audio(audio_url):
31
- # 1) Download audio to /tmp
32
  response = requests.get(audio_url)
33
  audio_path = "/tmp/temp_audio.wav"
34
  with open(audio_path, "wb") as f:
35
  f.write(response.content)
36
 
37
- # 2) Load audio with librosa
38
  waveform, sr = librosa.load(audio_path, sr=16000)
39
 
40
- # 3) Limit up to 1 hour for stability
41
  max_sec = 3600
42
  waveform = waveform[: sr * max_sec]
43
 
44
- # 4) Chunk the audio in 25-second intervals
45
  chunk_sec = 25
46
  chunk_size = sr * chunk_sec
47
  chunks = [waveform[i : i + chunk_size] for i in range(0, len(waveform), chunk_size)]
@@ -52,7 +51,7 @@ def transcribe_audio(audio_url):
52
  inputs = processor(chunk, sampling_rate=sr, return_tensors="pt", padding=True)
53
  input_features = inputs.input_features.to(device)
54
 
55
- # Force Hebrew to skip auto-detect logic
56
  with torch.no_grad():
57
  predicted_ids = model.generate(
58
  input_features,
@@ -71,10 +70,10 @@ def transcribe_endpoint():
71
  if not audio_url:
72
  return jsonify({"error": "Missing 'audio_url' in request"}), 400
73
 
74
- # Perform forced-Hebrew transcription
75
  text = transcribe_audio(audio_url)
76
 
77
- # Return JSON with no ASCII escaping (ensures real Hebrew chars)
78
  payload = {"Transcription": text}
79
  return Response(
80
  json.dumps(payload, ensure_ascii=False),
 
15
 
16
  app = Flask(__name__)
17
 
18
+ # Use your custom Hebrew Whisper model
19
+ model_id = "ivrit-ai/whisper-large-v3-turbo"
 
20
  processor = WhisperProcessor.from_pretrained(model_id)
21
  model = WhisperForConditionalGeneration.from_pretrained(model_id)
22
 
23
  device = "cuda" if torch.cuda.is_available() else "cpu"
24
  model.to(device)
25
 
26
+ # Force Hebrew so it won't require 30s audio for language detection
27
  forced_decoder_ids = processor.get_decoder_prompt_ids(language="he", task="transcribe")
28
 
29
  def transcribe_audio(audio_url):
30
+ # 1) Download audio file to /tmp
31
  response = requests.get(audio_url)
32
  audio_path = "/tmp/temp_audio.wav"
33
  with open(audio_path, "wb") as f:
34
  f.write(response.content)
35
 
36
+ # 2) Load with librosa
37
  waveform, sr = librosa.load(audio_path, sr=16000)
38
 
39
+ # 3) (Optional) limit up to 1 hour
40
  max_sec = 3600
41
  waveform = waveform[: sr * max_sec]
42
 
43
+ # 4) Split into 25-second chunks
44
  chunk_sec = 25
45
  chunk_size = sr * chunk_sec
46
  chunks = [waveform[i : i + chunk_size] for i in range(0, len(waveform), chunk_size)]
 
51
  inputs = processor(chunk, sampling_rate=sr, return_tensors="pt", padding=True)
52
  input_features = inputs.input_features.to(device)
53
 
54
+ # Force Hebrew, skipping auto-detect
55
  with torch.no_grad():
56
  predicted_ids = model.generate(
57
  input_features,
 
70
  if not audio_url:
71
  return jsonify({"error": "Missing 'audio_url' in request"}), 400
72
 
73
+ # Perform forced Hebrew transcription
74
  text = transcribe_audio(audio_url)
75
 
76
+ # Return raw Hebrew in JSON
77
  payload = {"Transcription": text}
78
  return Response(
79
  json.dumps(payload, ensure_ascii=False),