Exceedea

Runtime error

App Files Files Community

EladSpamson commited on Feb 21

Commit

0348b75

verified ·

1 Parent(s): 67f033c

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -10

app.py CHANGED Viewed

@@ -13,12 +13,23 @@ model = WhisperForConditionalGeneration.from_pretrained(model_id)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
-# Function to process long audio in ~3-5 min chunks
 def transcribe(audio):
     # Load the audio file and convert to 16kHz
     waveform, sr = librosa.load(audio, sr=16000)
-    # Set chunk size (~3-5 minutes per chunk)
     chunk_duration = 4 * 60  # 4 minutes (240 seconds)
     max_audio_length = 60 * 60  # 60 minutes
     chunks = []
@@ -29,6 +40,9 @@ def transcribe(audio):
     # Split audio into ~4-minute chunks
     for i in range(0, len(waveform), sr * chunk_duration):
         chunk = waveform[i : i + sr * chunk_duration]
         if len(chunk) < sr * 2:  # Skip chunks shorter than 2 seconds
             continue
@@ -37,8 +51,11 @@ def transcribe(audio):
     # Process each chunk and transcribe
     transcriptions = []
     for chunk in chunks:
         input_features = processor(chunk, sampling_rate=16000, return_tensors="pt").input_features.to(device)
         with torch.no_grad():
             predicted_ids = model.generate(
                 input_features,
@@ -57,13 +74,17 @@ def transcribe(audio):
     return full_transcription
 # Create the Gradio Interface
-iface = gr.Interface(
-    fn=transcribe,
-    inputs=gr.Audio(type="filepath"),  # Fixed input format
-    outputs="text",
-    title="Hebrew Speech-to-Text (Whisper)",
-    description="Upload a Hebrew audio file (up to 60 minutes) for full transcription.",
-)
 # Launch the Gradio app
 iface.launch()

 device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
+# Global variable to control stopping
+stop_processing = False
+# Function to stop transcription
+def stop():
+    global stop_processing
+    stop_processing = True  # This will break transcription
+# Function to process long audio in chunks
 def transcribe(audio):
+    global stop_processing
+    stop_processing = False  # Reset stop flag when new transcription starts
     # Load the audio file and convert to 16kHz
     waveform, sr = librosa.load(audio, sr=16000)
+    # Set chunk size (~4 min per chunk)
     chunk_duration = 4 * 60  # 4 minutes (240 seconds)
     max_audio_length = 60 * 60  # 60 minutes
     chunks = []
     # Split audio into ~4-minute chunks
     for i in range(0, len(waveform), sr * chunk_duration):
+        if stop_processing:
+            return "⚠️ Transcription Stopped by User ⚠️"
         chunk = waveform[i : i + sr * chunk_duration]
         if len(chunk) < sr * 2:  # Skip chunks shorter than 2 seconds
             continue
     # Process each chunk and transcribe
     transcriptions = []
     for chunk in chunks:
+        if stop_processing:
+            return "⚠️ Transcription Stopped by User ⚠️"
         input_features = processor(chunk, sampling_rate=16000, return_tensors="pt").input_features.to(device)
         with torch.no_grad():
             predicted_ids = model.generate(
                 input_features,
     return full_transcription
 # Create the Gradio Interface
+with gr.Blocks() as iface:
+    gr.Markdown("# Hebrew Speech-to-Text (Whisper)")
+    audio_input = gr.Audio(type="filepath", label="Upload Hebrew Audio")
+    output_text = gr.Textbox(label="Transcription Output")
+    start_btn = gr.Button("Start Transcription")
+    stop_btn = gr.Button("Stop Processing", variant="stop")
+    start_btn.click(transcribe, inputs=audio_input, outputs=output_text)
+    stop_btn.click(stop)  # Calls the stop function when clicked
 # Launch the Gradio app
 iface.launch()