EladSpamson commited on
Commit
e2ba5da
·
verified ·
1 Parent(s): 6b81368

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -44
app.py CHANGED
@@ -13,75 +13,40 @@ model.to(device)
13
  # Force Hebrew transcription
14
  forced_decoder_ids = processor.get_decoder_prompt_ids(language="he", task="transcribe")
15
 
16
- stop_processing = False
17
- def stop():
18
- global stop_processing
19
- stop_processing = True
20
-
21
  def transcribe_audio(audio_file):
22
- global stop_processing
23
- stop_processing = False
24
-
25
  waveform, sr = librosa.load(audio_file, sr=16000)
26
- time_limit_s = 10800 # 3 hours
27
- if len(waveform) > sr * time_limit_s:
28
- waveform = waveform[: sr * time_limit_s]
29
 
30
- # Safety limit (just in case)
31
- max_audio_sec = 10800
32
- if len(waveform) > sr * max_audio_sec:
33
- waveform = waveform[: sr * max_audio_sec]
34
-
35
- # Split into 25-second chunks
36
  chunk_duration_s = 25
37
  chunk_size = sr * chunk_duration_s
38
- chunks = []
39
- for start_idx in range(0, len(waveform), chunk_size):
40
- chunk = waveform[start_idx : start_idx + chunk_size]
41
- if len(chunk) < sr * 1:
42
- continue
43
- chunks.append(chunk)
44
 
45
  partial_text = ""
46
- for i, chunk in enumerate(chunks):
47
- if stop_processing:
48
- yield "⚠️ Stopped by User ⚠️"
49
- return
50
-
51
- inputs = processor(
52
- chunk,
53
- sampling_rate=16000,
54
- return_tensors="pt",
55
- padding="longest",
56
- return_attention_mask=True
57
- )
58
  input_features = inputs.input_features.to(device)
59
- attention_mask = inputs.attention_mask.to(device)
60
 
61
  with torch.no_grad():
62
  predicted_ids = model.generate(
63
  input_features,
64
- attention_mask=attention_mask,
65
- max_new_tokens=444,
66
- do_sample=False,
67
  forced_decoder_ids=forced_decoder_ids
68
  )
69
 
70
  text_chunk = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
71
  partial_text += text_chunk + "\n"
72
 
73
- yield partial_text
74
 
75
  with gr.Blocks() as demo:
76
- gr.Markdown("## Exceedea Transcription )")
77
 
78
  audio_input = gr.Audio(type="filepath", label="Upload Audio (Truncate to 1 hour)")
79
- output_text = gr.Textbox(label="Partial Transcription")
80
 
81
  start_btn = gr.Button("Start Transcription")
82
- stop_btn = gr.Button("Stop Processing", variant="stop")
83
 
84
  start_btn.click(transcribe_audio, inputs=audio_input, outputs=output_text)
85
- stop_btn.click(stop)
86
 
87
  demo.launch()
 
13
  # Force Hebrew transcription
14
  forced_decoder_ids = processor.get_decoder_prompt_ids(language="he", task="transcribe")
15
 
 
 
 
 
 
16
  def transcribe_audio(audio_file):
 
 
 
17
  waveform, sr = librosa.load(audio_file, sr=16000)
18
+ max_audio_sec = 3600 # recommended 1-hour limit for stability
19
+ waveform = waveform[:sr * max_audio_sec]
 
20
 
 
 
 
 
 
 
21
  chunk_duration_s = 25
22
  chunk_size = sr * chunk_duration_s
23
+ chunks = [waveform[i:i + chunk_size] for i in range(0, len(waveform), chunk_size)]
 
 
 
 
 
24
 
25
  partial_text = ""
26
+ for chunk in chunks:
27
+ inputs = processor(chunk, sampling_rate=16000, return_tensors="pt", padding=True)
 
 
 
 
 
 
 
 
 
 
28
  input_features = inputs.input_features.to(device)
 
29
 
30
  with torch.no_grad():
31
  predicted_ids = model.generate(
32
  input_features,
33
+ max_new_tokens=444,
 
 
34
  forced_decoder_ids=forced_decoder_ids
35
  )
36
 
37
  text_chunk = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
38
  partial_text += text_chunk + "\n"
39
 
40
+ return partial_text # no yield, just final result
41
 
42
  with gr.Blocks() as demo:
43
+ gr.Markdown("## Exceedea Transcription")
44
 
45
  audio_input = gr.Audio(type="filepath", label="Upload Audio (Truncate to 1 hour)")
46
+ output_text = gr.Textbox(label="Full Transcription")
47
 
48
  start_btn = gr.Button("Start Transcription")
 
49
 
50
  start_btn.click(transcribe_audio, inputs=audio_input, outputs=output_text)
 
51
 
52
  demo.launch()