Exceedea

Runtime error

App Files Files Community

EladSpamson commited on Feb 22

Commit

ba4bba3

verified ·

1 Parent(s): 8be8710

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -29

app.py CHANGED Viewed

@@ -10,50 +10,86 @@ model = WhisperForConditionalGeneration.from_pretrained(model_id)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
-forced_decoder_ids = processor.get_decoder_prompt_ids(language="he", task="transcribe")
 def transcribe_audio(audio_file):
-    """Process only the first 30 seconds of an audio file and return text."""
     waveform, sr = librosa.load(audio_file, sr=16000)
-    # Limit to first 30 seconds
-    time_limit_s = 30
     if len(waveform) > sr * time_limit_s:
         waveform = waveform[: sr * time_limit_s]
-    # Preprocess
-    inputs = processor(
-        waveform,
-        sampling_rate=16000,
-        return_tensors="pt",
-        padding="longest",
-        return_attention_mask=True
-    )
-    input_features = inputs.input_features.to(device)
-    attention_mask = inputs.attention_mask.to(device)
-    # Transcribe
-    with torch.no_grad():
-        predicted_ids = model.generate(
-            input_features,
-            attention_mask=attention_mask,
-            max_new_tokens=444,
-            do_sample=False,
-            forced_decoder_ids=forced_decoder_ids
         )
-    # Decode and return text
-    text = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
-    return text
-# Expose API endpoint for Make.com
 demo = gr.Interface(
     fn=transcribe_audio,
     inputs=gr.Audio(type="filepath"),
     outputs="text",
     title="Hebrew Whisper API",
-    api_name="transcribe"  # This enables API access
 )
-# Run on Hugging Face Spaces
 demo.launch()

 device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
+# Force Hebrew transcription
+forced_decoder_ids = processor.get_decoder_prompt_ids(
+    language="he",
+    task="transcribe"
+)
+stop_processing = False
+def stop():
+    global stop_processing
+    stop_processing = True
 def transcribe_audio(audio_file):
+    """
+    Process the first 30 seconds of the audio, in 5-second chunks.
+    Return full transcription as a single output.
+    """
+    global stop_processing
+    stop_processing = False
+    # Load at 16kHz
     waveform, sr = librosa.load(audio_file, sr=16000)
+    # Truncate to the first 30 seconds
+    time_limit_s = 6000
     if len(waveform) > sr * time_limit_s:
         waveform = waveform[: sr * time_limit_s]
+    # Also limit if total is over 60 min (safety)
+    max_audio_sec = 60 * 60
+    if len(waveform) > sr * max_audio_sec:
+        waveform = waveform[: sr * max_audio_sec]
+    # Split into 5s chunks
+    chunk_duration_s = 25
+    chunk_size = sr * chunk_duration_s
+    chunks = []
+    for start_idx in range(0, len(waveform), chunk_size):
+        chunk = waveform[start_idx : start_idx + chunk_size]
+        if len(chunk) < sr * 1:
+            continue
+        chunks.append(chunk)
+    partial_text = ""
+    # Transcribe chunk by chunk
+    for chunk in chunks:
+        if stop_processing:
+            return "⚠️ Stopped by User ⚠️"
+        inputs = processor(
+            chunk,
+            sampling_rate=16000,
+            return_tensors="pt",
+            padding="longest",
+            return_attention_mask=True
         )
+        input_features = inputs.input_features.to(device)
+        attention_mask = inputs.attention_mask.to(device)
+        with torch.no_grad():
+            predicted_ids = model.generate(
+                input_features,
+                attention_mask=attention_mask,
+                max_new_tokens=444,
+                do_sample=False,
+                forced_decoder_ids=forced_decoder_ids
+            )
+        text_chunk = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
+        partial_text += text_chunk + "\n"
+    return partial_text.strip()
+# Build Gradio UI with API support
 demo = gr.Interface(
     fn=transcribe_audio,
     inputs=gr.Audio(type="filepath"),
     outputs="text",
     title="Hebrew Whisper API",
+    api_name="transcribe"  # Enables API access for Make.com
 )
 demo.launch()