EladSpamson commited on
Commit
2a0a17e
·
verified ·
1 Parent(s): e35f2fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -16
app.py CHANGED
@@ -21,9 +21,9 @@ def stop():
21
  global stop_processing
22
  stop_processing = True
23
 
24
- def transcribe_3600_seconds(audio_file):
25
  """
26
- Process only the first 30 seconds of the audio, in small 5-second chunks.
27
  Return partial text chunk by chunk (generator).
28
  """
29
  global stop_processing
@@ -32,22 +32,23 @@ def transcribe_3600_seconds(audio_file):
32
  # 1) Load at 16kHz
33
  waveform, sr = librosa.load(audio_file, sr=16000)
34
 
35
- # 2) Truncate to the first 30 seconds
36
- time_limit_s = 6000
37
  if len(waveform) > sr * time_limit_s:
38
  waveform = waveform[: sr * time_limit_s]
39
 
40
- # Also limit if total is over 60 min (safety)
41
  max_audio_sec = 60 * 60
42
  if len(waveform) > sr * max_audio_sec:
43
  waveform = waveform[: sr * max_audio_sec]
44
 
45
- # 3) Split that 30s portion into 5s chunks
46
  chunk_duration_s = 25
47
  chunk_size = sr * chunk_duration_s
48
  chunks = []
49
  for start_idx in range(0, len(waveform), chunk_size):
50
  chunk = waveform[start_idx : start_idx + chunk_size]
 
51
  if len(chunk) < sr * 1:
52
  continue
53
  chunks.append(chunk)
@@ -74,7 +75,7 @@ def transcribe_3600_seconds(audio_file):
74
  predicted_ids = model.generate(
75
  input_features,
76
  attention_mask=attention_mask,
77
- max_new_tokens=444,
78
  do_sample=False,
79
  forced_decoder_ids=forced_decoder_ids
80
  )
@@ -82,15 +83,22 @@ def transcribe_3600_seconds(audio_file):
82
  text_chunk = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
83
  partial_text += text_chunk + "\n"
84
 
 
85
  yield partial_text
86
 
87
- # Build Gradio UI with API support
88
- demo = gr.Interface(
89
- fn=transcribe_3600_seconds,
90
- inputs=gr.Audio(type="filepath"),
91
- outputs="text",
92
- title="Hebrew Whisper API",
93
- api_name="transcribe" # Enables API access
94
- )
 
 
 
 
 
95
 
96
- demo.launch()
 
 
21
  global stop_processing
22
  stop_processing = True
23
 
24
+ def transcribe_audio(audio_file):
25
  """
26
+ Process up to 3600 seconds of the audio (1 hour) in 25-second chunks.
27
  Return partial text chunk by chunk (generator).
28
  """
29
  global stop_processing
 
32
  # 1) Load at 16kHz
33
  waveform, sr = librosa.load(audio_file, sr=16000)
34
 
35
+ # 2) Truncate to the first 3600 seconds (1 hour)
36
+ time_limit_s = 3600
37
  if len(waveform) > sr * time_limit_s:
38
  waveform = waveform[: sr * time_limit_s]
39
 
40
+ # Additional safety limit if total is somehow over 60 minutes
41
  max_audio_sec = 60 * 60
42
  if len(waveform) > sr * max_audio_sec:
43
  waveform = waveform[: sr * max_audio_sec]
44
 
45
+ # 3) Split that audio into 25-second chunks
46
  chunk_duration_s = 25
47
  chunk_size = sr * chunk_duration_s
48
  chunks = []
49
  for start_idx in range(0, len(waveform), chunk_size):
50
  chunk = waveform[start_idx : start_idx + chunk_size]
51
+ # Skip very small final pieces (less than 1 second)
52
  if len(chunk) < sr * 1:
53
  continue
54
  chunks.append(chunk)
 
75
  predicted_ids = model.generate(
76
  input_features,
77
  attention_mask=attention_mask,
78
+ max_new_tokens=444, # keep under total token limit
79
  do_sample=False,
80
  forced_decoder_ids=forced_decoder_ids
81
  )
 
83
  text_chunk = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
84
  partial_text += text_chunk + "\n"
85
 
86
+ # Stream updated partial text
87
  yield partial_text
88
 
89
+ # Build Gradio UI
90
+ with gr.Blocks() as demo:
91
+ gr.Markdown("## Hebrew Whisper (up to 1 hour, 25-second chunks)")
92
+
93
+ audio_input = gr.Audio(type="filepath", label="Upload Audio (Truncate to 1 hour)")
94
+ output_text = gr.Textbox(label="Partial Transcription")
95
+
96
+ start_btn = gr.Button("Start Transcription")
97
+ stop_btn = gr.Button("Stop Processing", variant="stop")
98
+
99
+ # Stream chunk-by-chunk without a progress bar
100
+ start_btn.click(transcribe_audio, inputs=audio_input, outputs=output_text)
101
+ stop_btn.click(stop)
102
 
103
+ # Enable API by setting enable_api=True
104
+ demo.launch(enable_api=True)