tusker123 commited on
Commit
dcdec88
·
verified ·
1 Parent(s): 344bd2d

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +570 -0
app.py ADDED
@@ -0,0 +1,570 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
3
+ import requests
4
+ import os
5
+ from moviepy.editor import VideoFileClip
6
+ import tempfile
7
+ import re
8
+ from urllib.parse import urlparse
9
+ from gradio import Progress
10
+ from pathlib import Path
11
+ import torch
12
+ import shutil # Import shutil for explicit temporary directory cleanup
13
+ import soundfile as sf # Import soundfile for explicit audio loading
14
+
15
+ # Load the audio classification model for English accents
16
+ pipe = pipeline("audio-classification", model="dima806/english_accents_classification")
17
+
18
+ # Load the language detection model
19
+ language_detector = pipeline("text-classification", model="alexneakameni/language_detection")
20
+
21
+ # Load a small ASR (Automatic Speech Recognition) model for transcribing audio clips
22
+ # This is used to get text from audio for language detection.
23
+ # Using 'openai/whisper-tiny.en' for a faster, English-focused transcription.
24
+ # Ensure to move model to GPU if available for faster inference.
25
+ device = 0 if torch.cuda.is_available() else -1
26
+ # Corrected ASR model ID to a valid Hugging Face model
27
+ asr_model_id = "openai/whisper-tiny.en"
28
+ asr_model = AutoModelForSpeechSeq2Seq.from_pretrained(asr_model_id)
29
+ asr_processor = AutoProcessor.from_pretrained(asr_model_id)
30
+ asr_pipe = pipeline(
31
+ "automatic-speech-recognition",
32
+ model=asr_model,
33
+ tokenizer=asr_processor.tokenizer,
34
+ feature_extractor=asr_processor.feature_extractor,
35
+ device=device
36
+ )
37
+
38
+ def is_valid_url(url):
39
+ """
40
+ Checks if the given URL is valid and from allowed domains (MP4, Loom, or Google Drive).
41
+ Args:
42
+ url (str): The URL to validate.
43
+ Returns:
44
+ bool: True if the URL is valid and allowed, False otherwise.
45
+ """
46
+ if not url:
47
+ return False
48
+ try:
49
+ result = urlparse(url)
50
+ if not all([result.scheme, result.netloc]):
51
+ return False
52
+
53
+ allowed_domains = [
54
+ 'loom.com',
55
+ 'cdn.loom.com',
56
+ 'www.dropbox.com',
57
+ 'dl.dropboxusercontent.com',
58
+ 'drive.google.com' # Added Google Drive domain
59
+ ]
60
+
61
+ # Check if the domain is in our allowed list
62
+ is_allowed_domain = any(domain in result.netloc.lower() for domain in allowed_domains)
63
+
64
+ # Check if the path part of the URL ends with .mp4
65
+ ends_with_mp4 = result.path.lower().endswith('.mp4')
66
+
67
+ if is_allowed_domain:
68
+ if ends_with_mp4:
69
+ return True
70
+ elif 'drive.google.com' in result.netloc.lower():
71
+ # Check for typical Google Drive patterns for shared files or download links
72
+ return '/file/d/' in result.path or '/uc' in result.path
73
+ elif any(domain in result.netloc.lower() for domain in ['loom.com', 'cdn.loom.com']):
74
+ return True # Allow Loom URLs even if they don't end in .mp4
75
+ elif ends_with_mp4:
76
+ # Allow direct .mp4 links from other domains if they end with .mp4
77
+ return True
78
+
79
+ return False
80
+ except Exception:
81
+ return False
82
+
83
+ def is_valid_file(file_obj):
84
+ """
85
+ Checks if the uploaded file object represents a valid video file format.
86
+ Args:
87
+ file_obj (gr.File): The Gradio file object.
88
+ Returns:
89
+ bool: True if the file is a supported video format, False otherwise.
90
+ """
91
+ if not file_obj:
92
+ return False
93
+ # Get the file extension from the uploaded file object's name
94
+ file_path = file_obj.name
95
+ # Check if the file extension is one of the supported video formats
96
+ return Path(file_path).suffix.lower() in ['.mp4', '.mov', '.avi', '.mkv']
97
+
98
+ def download_file(url, save_path, progress=Progress()):
99
+ """
100
+ Downloads a video file from a given URL to a specified path.
101
+ Raises ValueError if the URL is invalid, ConnectionError if download fails.
102
+ Args:
103
+ url (str): The URL of the video to download.
104
+ save_path (str): The local path to save the downloaded video.
105
+ progress (gradio.Progress): Gradio progress tracker for UI updates.
106
+ """
107
+ if not is_valid_url(url):
108
+ raise ValueError("Invalid URL. Only .mp4 files or Loom videos are accepted.")
109
+
110
+ response = requests.get(url, stream=True)
111
+ # Check if the download was successful (HTTP status code 200)
112
+ if response.status_code != 200:
113
+ raise ConnectionError(f"Failed to download video (HTTP {response.status_code})")
114
+
115
+ # Get the total size of the file for progress tracking
116
+ total_size = int(response.headers.get('content-length', 0))
117
+ downloaded = 0
118
+
119
+ # Write the downloaded content to the specified save path in chunks
120
+ with open(save_path, 'wb') as f:
121
+ for chunk in response.iter_content(chunk_size=8192):
122
+ if chunk: # Filter out keep-alive new chunks
123
+ f.write(chunk)
124
+ downloaded += len(chunk)
125
+ if total_size > 0:
126
+ # Update progress bar based on downloaded percentage
127
+ progress(downloaded / total_size, desc="📥 Downloading video...")
128
+ else:
129
+ # If total size is unknown, just show a general downloading message
130
+ progress(0, desc="📥 Downloading video (size unknown)...")
131
+
132
+ def extract_audio_full(video_path, progress=Progress()):
133
+ """
134
+ Extracts the full duration of audio from a video file and saves it as a WAV file.
135
+ Uses tempfile.NamedTemporaryFile to ensure the file persists for Gradio.
136
+ Args:
137
+ video_path (str): Path to the input video file.
138
+ progress (gradio.Progress): Gradio progress tracker for UI updates.
139
+ Returns:
140
+ str: The path to the extracted audio file.
141
+ """
142
+ try:
143
+ progress(0, desc="🔊 Extracting full audio for playback...")
144
+ video = VideoFileClip(video_path)
145
+
146
+ # Create a temporary WAV file that Gradio can manage
147
+ temp_audio_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
148
+ audio_path = temp_audio_file.name
149
+ temp_audio_file.close() # Close the file handle immediately so moviepy can write to it
150
+
151
+ audio_clip = video.audio
152
+ audio_clip.write_audiofile(audio_path, fps=16000, logger=None)
153
+ video.close()
154
+ audio_clip.close()
155
+ progress(1.0)
156
+ return audio_path
157
+ except Exception as e:
158
+ raise Exception(f"Full audio extraction failed: {str(e)}")
159
+
160
+ def extract_audio_clip(video_path, audio_path, duration, progress=Progress()):
161
+ """
162
+ Extracts a specified duration of audio from a video file and saves it as a WAV file.
163
+ Args:
164
+ video_path (str): Path to the input video file.
165
+ audio_path (str): Path to save the extracted audio WAV file.
166
+ duration (int): The duration of audio to extract in seconds.
167
+ progress (gradio.Progress): Gradio progress tracker for UI updates.
168
+ Returns:
169
+ str: The path to the extracted audio file.
170
+ """
171
+ try:
172
+ progress(0, desc=f"🔊 Extracting {duration} seconds of audio for analysis...")
173
+ video = VideoFileClip(video_path)
174
+ # Ensure the subclip duration does not exceed the video's actual duration
175
+ clip_duration = min(duration, video.duration)
176
+ audio_clip = video.audio.subclip(0, clip_duration)
177
+ audio_clip.write_audiofile(audio_path, fps=16000, logger=None)
178
+ video.close()
179
+ audio_clip.close()
180
+ progress(1.0)
181
+ return audio_path
182
+ except Exception as e:
183
+ raise Exception(f"Audio clip extraction failed: {str(e)}")
184
+
185
+ def transcribe_audio(audio_path_clip, progress=Progress()):
186
+ """
187
+ Transcribes a short audio clip to text using the ASR pipeline.
188
+ Args:
189
+ audio_path_clip (str): Path to the short audio clip.
190
+ Returns:
191
+ str: The transcribed text.
192
+ """
193
+ try:
194
+ progress(0, desc="📝 Transcribing audio for language detection...")
195
+
196
+ # Load audio using soundfile
197
+ audio_input, sampling_rate = sf.read(audio_path_clip)
198
+
199
+ # Ensure the audio is mono if the model expects it (Whisper typically does)
200
+ if audio_input.ndim > 1:
201
+ audio_input = audio_input.mean(axis=1) # Convert to mono
202
+
203
+ # Process audio with the ASR processor
204
+ # This handles resampling, padding, and feature extraction to match model requirements
205
+ inputs = asr_processor(audio_input, sampling_rate=sampling_rate, return_tensors="pt")
206
+
207
+ # Move inputs to the correct device
208
+ if device != -1:
209
+ inputs = {k: v.to(device) for k, v in inputs.items()}
210
+
211
+ # Generate transcription with the ASR model
212
+ with torch.no_grad():
213
+ # max_new_tokens can be adjusted based on expected transcription length
214
+ # For short clips (15s), 128 is usually more than enough
215
+ output_tokens = asr_model.generate(**inputs, max_new_tokens=128)
216
+
217
+ text = asr_processor.tokenizer.batch_decode(output_tokens, skip_special_tokens=True)[0]
218
+
219
+ progress(1.0)
220
+ return text
221
+ except Exception as e:
222
+ print(f"Transcription failed: {e}")
223
+ return "" # Return empty string on failure
224
+
225
+ def classify_audio(audio_path, progress=Progress()):
226
+ """
227
+ Classifies the accent in an audio file using the pre-loaded Hugging Face pipeline.
228
+ Args:
229
+ audio_path (str): Path to the input audio file.
230
+ Returns:
231
+ list: A list of dictionaries containing accent labels and confidence scores.
232
+ """
233
+ try:
234
+ progress(0, desc="🔍 Analyzing accent - please be patient...")
235
+ result = pipe(audio_path)
236
+ progress(1.0) # Mark completion
237
+ return result
238
+ except Exception as e:
239
+ raise Exception(f"Classification failed: {str(e)}")
240
+
241
+ def process_video_unified(video_source, analysis_duration, progress=Progress()):
242
+ """
243
+ Processes either a video URL or an uploaded video file to classify accent.
244
+ Includes language detection before accent classification.
245
+ Args:
246
+ video_source (str or gr.File): The input, either a URL string or a Gradio File object.
247
+ analysis_duration (int): The duration of audio to analyze for accent classification in seconds.
248
+ progress (gradio.Progress): Gradio progress tracker for UI updates.
249
+ Returns:
250
+ tuple: (language_status_html, html_output, audio_path, error_flag)
251
+ language_status_html (str): HTML string displaying language detection status.
252
+ html_output (str): HTML string displaying accent results or error.
253
+ audio_path (str or None): Path to extracted full audio if successful, else None.
254
+ error_flag (bool): True if an error occurred, False otherwise.
255
+ """
256
+ temp_dir = None
257
+ full_audio_path = None # Initialize to None
258
+ try:
259
+ temp_dir = tempfile.mkdtemp() # Create temp dir for intermediate files (video, clipped audio)
260
+ video_path = os.path.join(temp_dir, "video.mp4")
261
+
262
+ # Determine if input is a URL string or an uploaded Gradio File object
263
+ if isinstance(video_source, str) and video_source.startswith(('http://', 'https://')):
264
+ if not is_valid_url(video_source):
265
+ raise ValueError("Invalid URL. Only .mp4 files or Loom videos are accepted.")
266
+ download_file(video_source, video_path, progress)
267
+ elif hasattr(video_source, 'name'):
268
+ if not is_valid_file(video_source):
269
+ raise ValueError("Invalid file format. Please upload a video file (MP4)")
270
+ with open(video_source.name, 'rb') as src_file:
271
+ with open(video_path, 'wb') as dest_file:
272
+ dest_file.write(src_file.read())
273
+ else:
274
+ raise ValueError("Unsupported input type. Please provide a video URL or upload a file.")
275
+
276
+ # Verify that the video file exists after download/upload
277
+ if not os.path.exists(video_path):
278
+ raise Exception("Video processing failed: Video file not found after download/upload.")
279
+
280
+ # Extract full audio for playback using tempfile.NamedTemporaryFile
281
+ full_audio_path = extract_audio_full(video_path, progress)
282
+
283
+ # Extract a short clip for transcription and language detection (e.g., first 15 seconds)
284
+ transcription_clip_duration = 15
285
+ audio_for_transcription_path = os.path.join(temp_dir, "audio_for_transcription.wav")
286
+ extract_audio_clip(video_path, audio_for_transcription_path, transcription_clip_duration, progress)
287
+
288
+ if not os.path.exists(full_audio_path):
289
+ raise Exception("Audio extraction failed: Full audio file not found.")
290
+ if not os.path.exists(audio_for_transcription_path):
291
+ raise Exception("Audio extraction failed: Clipped audio for transcription not found.")
292
+
293
+ # Transcribe the short audio clip
294
+ transcribed_text = transcribe_audio(audio_for_transcription_path, progress)
295
+ if not transcribed_text.strip():
296
+ language_status_html = "<p style='color: orange; font-weight: bold;'>⚠️ Could not transcribe audio for language detection. Please ensure audio is clear.</p>"
297
+ # If transcription fails, we can't detect language, so we'll proceed with accent classification
298
+ # but provide a warning. Or, you could choose to stop here. For now, let's proceed.
299
+ else:
300
+ # Perform language detection
301
+ lang_detection_result = language_detector(transcribed_text)
302
+ detected_language = lang_detection_result[0]['label']
303
+ lang_confidence = lang_detection_result[0]['score']
304
+
305
+ # Check if detected language is English or eng_Latn with a reasonable confidence
306
+ if (detected_language.lower() == 'english' or detected_language.lower() == 'eng_latn') and lang_confidence > 0.7: # Added 'eng_Latn' check
307
+ language_status_html = f"<p style='color: green; font-weight: bold;'>✅ Verified English Language (Confidence: {lang_confidence*100:.2f}%)</p>"
308
+ else:
309
+ language_status_html = f"<p style='color: red; font-weight: bold;'>⚠️ Detected language: {detected_language.capitalize()} (Confidence: {lang_confidence*100:.2f}%). Please provide English audio for accent classification.</p>"
310
+ # If not English, return early with an error message and skip accent classification
311
+ return language_status_html, "", full_audio_path, True # Set error flag to True
312
+
313
+ # Extract audio clip for accent classification (based on analysis_duration slider)
314
+ audio_for_classification_path = os.path.join(temp_dir, "audio_for_classification.wav")
315
+ extract_audio_clip(video_path, audio_for_classification_path, analysis_duration, progress)
316
+
317
+ if not os.path.exists(audio_for_classification_path):
318
+ raise Exception("Audio extraction failed: Clipped audio for classification not found.")
319
+
320
+ # Classify the extracted audio for accent
321
+ result = classify_audio(audio_for_classification_path, progress)
322
+
323
+ if not result:
324
+ return language_status_html, "<p style='color: red; font-weight: bold;'>⚠️ No accent prediction returned</p>", full_audio_path, True
325
+
326
+ # Build results table for display
327
+ # Adjusted table width to 'fit-content' and individual column widths
328
+ table = """
329
+ <table style='width: fit-content; max-width: 100%; border-collapse: collapse; font-family: Arial, sans-serif; margin-top: 1em;'>
330
+ <thead>
331
+ <tr style='border-bottom: 2px solid #4CAF50; background-color: #f2f2f2;'>
332
+ <th style='text-align:left; padding: 8px; font-size: 1.1em; color: #333; width: auto; min-width: 50px;'>Rank</th>
333
+ <th style='text-align:left; padding: 8px; font-size: 1.1em; color: #333; width: auto; min-width: 100px;'>Accent</th>
334
+ <th style='text-align:left; padding: 8px; font-size: 1.1em; color: #333; width: auto; min-width: 180px;'>Confidence (%)</th>
335
+ <th style='text-align:left; padding: 8px; font-size: 1.1em; color: #333; width: auto; min-width: 80px;'>Score</th>
336
+ </tr>
337
+ </thead>
338
+ <tbody>
339
+ """
340
+
341
+ for i, r in enumerate(result):
342
+ label = r['label'].capitalize()
343
+ score = r['score']
344
+ score_formatted_percent = f"{score * 100:.2f}%"
345
+ score_formatted_raw = f"{score:.4f}"
346
+ if i == 0:
347
+ row = f"""
348
+ <tr style='background-color:#d4edda; font-weight: bold; color: #155724;'>
349
+ <td style='padding: 8px; border-bottom: 1px solid #c3e6cb; width: auto; min-width: 50px;'>#{i+1}</td>
350
+ <td style='padding: 8px; border-bottom: 1px solid #c3e6cb; width: auto; min-width: 100px;'>{label}</td>
351
+ <td style='padding: 8px; border-bottom: 1px solid #c3e6cb; width: auto; min-width: 180px;'>
352
+ <div style='display: flex; align-items: center;'>
353
+ <span style='width: auto; display: inline-block;'>{score_formatted_percent}</span>
354
+ <progress value='{score * 100}' max='100' style='width: 100%; margin-left: 10px;'></progress>
355
+ </div>
356
+ </td>
357
+ <td style='padding: 8px; border-bottom: 1px solid #c3e6cb; width: auto; min-width: 80px;'>
358
+ <span style='width: auto; display: inline-block;'>{score_formatted_raw}</span>
359
+ </td>
360
+ </tr>
361
+ """
362
+ else:
363
+ row = f"""
364
+ <tr style='color: #333;'>
365
+ <td style='padding: 8px; border-bottom: 1px solid #ddd; width: auto; min-width: 50px;'>#{i+1}</td>
366
+ <td style='padding: 8px; border-bottom: 1px solid #ddd; width: auto; min-width: 100px;'>{label}</td>
367
+ <td style='padding: 8px; border-bottom: 1px solid #ddd; width: auto; min-width: 180px;'>
368
+ <div style='display: flex; align-items: center;'>
369
+ <span style='width: auto; display: inline-block;'>{score_formatted_percent}</span>
370
+ <progress value='{score * 100}' max='100' style='width: 100%; margin-left: 10px;'></progress>
371
+ </div>
372
+ </td>
373
+ <td style='padding: 8px; border-bottom: 1px solid #ddd; width: auto; min-width: 80px;'>
374
+ <span style='display: inline-block;'>{score_formatted_raw}</span>
375
+ </td>
376
+ </tr>
377
+ """
378
+ table += row
379
+
380
+ table += "</tbody></table>"
381
+
382
+ top_result = result[0]
383
+ html_output = f"""
384
+ <div style='font-family: Arial, sans-serif;'>
385
+ <h2 style='color: #2E7D32; margin-bottom: 0.5em;'>
386
+ 🎤 Predicted Accent: <span style='font-weight:bold'>{top_result['label'].capitalize()}</span>
387
+ <span style='font-size: 0.8em; color: #555; font-weight: normal;'>
388
+ (Confidence: {top_result['score']*100:.2f}%)
389
+ </span>
390
+ </h2>
391
+ {table}
392
+ </div>
393
+ """
394
+
395
+ # Return language status, accent results HTML, full audio path, and no error flag
396
+ return language_status_html, html_output, full_audio_path, False
397
+
398
+ except Exception as e:
399
+ # If any error occurs, return an error message and set the error flag
400
+ return "", f"<p style='color: red; font-weight: bold;'>⚠️ Error: {str(e)}</p>", None, True
401
+ finally:
402
+ # Explicitly clean up the temporary directory created for intermediate files.
403
+ # The full_audio_path is now managed by NamedTemporaryFile and Gradio.
404
+ if temp_dir and os.path.exists(temp_dir):
405
+ shutil.rmtree(temp_dir)
406
+
407
+
408
+ # Define a custom Gradio theme for improved aesthetics
409
+ # This theme inherits from the default theme and overrides specific properties.
410
+ my_theme = gr.themes.Default().set(
411
+ # Background colors: A light grey for the primary background, white for inner blocks
412
+ background_fill_primary="#f0f2f5",
413
+ background_fill_secondary="#ffffff",
414
+ # Border for a cleaner look
415
+ border_color_primary="#e0e0e0",
416
+ # Button styling for a consistent look
417
+ # Changed primary button color to a darker, muted green
418
+ button_primary_background_fill="#4CAF50", # A standard green
419
+ button_primary_background_fill_hover="#66BB6A", # A slightly lighter green on hover
420
+ button_primary_text_color="#ffffff", # White text for primary buttons
421
+ # Changed secondary button color to a darker, muted green
422
+ button_secondary_background_fill="#4CAF50", # A standard green
423
+ button_secondary_background_fill_hover="#66BB6A", # A slightly lighter green on hover
424
+ button_secondary_text_color="#ffffff", # White text for secondary buttons
425
+
426
+ # Accent color for sliders and other accent elements
427
+ color_accent="#2196F3", # Blue for accent elements like sliders
428
+ color_accent_soft="#BBDEFB", # Lighter blue for soft accent elements
429
+ )
430
+
431
+
432
+ # Gradio app interface definition
433
+ with gr.Blocks(theme=my_theme) as app: # Apply the custom theme here
434
+ gr.Markdown("""
435
+ <div style='font-family: Arial, sans-serif;'>
436
+ <h1 style='color: #2E7D32;'>🎤 English Accent Classifier</h1>
437
+ <p>Analyze English accents from either:</p>
438
+ <ul>
439
+ <li>A video URL (MP4 or Loom videos)</li>
440
+ <li>Or upload a video file from your computer</li>
441
+ </ul>
442
+ <p>The accent analysis will be performed on the first <strong>60 seconds</strong> of audio by default, after language detection.</p>
443
+ <p>The analysis may take some time depending on the video size and your chosen analysis duration. Please be patient while we process your video.</p>
444
+ <p><strong>Supported file formats:</strong> MP4 </p>
445
+ <p style='font-size: 0.9em; color: #666;'>
446
+ <strong>Note:</strong> This application requires <a href='https://ffmpeg.org/download.html' target='_blank' style='color: #2E7D32;'>FFmpeg</a> to be installed on your system to process video and audio files.
447
+ </p>
448
+ </div>
449
+ """)
450
+
451
+ with gr.Row():
452
+ with gr.Column(scale=1):
453
+ url_input = gr.Textbox(
454
+ label="🔗 Video URL (MP4 or Loom)",
455
+ placeholder="Paste URL here..."
456
+ )
457
+ video_input = gr.File(
458
+ label="📁 Upload Video File",
459
+ file_types=["video"],
460
+ interactive=True
461
+ )
462
+ with gr.Column(scale=1):
463
+ analysis_duration = gr.Slider(
464
+ minimum=5,
465
+ maximum=120,
466
+ step=5,
467
+ value=60,
468
+ label="Accent Analysis Duration (seconds)",
469
+ info="Analyze the first N seconds of audio for accent classification."
470
+ )
471
+ with gr.Row():
472
+ submit_btn = gr.Button("Analyze Video", variant="primary")
473
+ clear_btn = gr.Button("Clear Input")
474
+
475
+ status_box = gr.Textbox(
476
+ label="Status",
477
+ placeholder="Waiting for video input...",
478
+ interactive=False,
479
+ visible=True
480
+ )
481
+ progress_bar = gr.Slider(
482
+ visible=False,
483
+ label="Processing Progress",
484
+ interactive=False
485
+ )
486
+
487
+ # Placing outputs in a new row to allow for better vertical stacking on smaller screens
488
+ # and horizontal arrangement on larger screens.
489
+ with gr.Row():
490
+ # Using gr.Column to contain the language status and audio player
491
+ with gr.Column(scale=1, min_width=300): # Added min_width for better control
492
+ language_status_html = gr.HTML(label="Language Detection Status", visible=True)
493
+ audio_player = gr.Audio(label="Extracted Audio (Full Duration)", visible=True)
494
+ # Using gr.Column for the main results table and error output
495
+ with gr.Column(scale=2, min_width=400): # Added min_width for better control
496
+ output_html = gr.HTML()
497
+ error_output = gr.HTML(visible=False)
498
+
499
+ def unified_processing_fn(video_url, video_file, analysis_duration, progress=Progress()):
500
+ video_source = video_url if video_url else video_file
501
+
502
+ yield (
503
+ gr.Textbox(value="⏳ Processing started - please be patient...", visible=True),
504
+ gr.Slider(visible=True, value=0),
505
+ gr.HTML(value="", visible=True), # Clear language status
506
+ gr.HTML(value="", visible=False), # Hide previous HTML output
507
+ gr.Audio(value=None, visible=True, label="Extracted Audio (Full Duration)"),
508
+ gr.HTML(value="", visible=False) # Hide previous error output
509
+ )
510
+
511
+ try:
512
+ lang_status, html, audio_path, error = process_video_unified(video_source, analysis_duration, progress)
513
+
514
+ if error:
515
+ yield (
516
+ gr.Textbox(value="❌ Processing failed", visible=True),
517
+ gr.Slider(visible=False),
518
+ gr.HTML(value=lang_status, visible=True),
519
+ gr.HTML(value="", visible=False),
520
+ gr.Audio(value=audio_path, visible=True, label="Extracted Audio (Full Duration)"),
521
+ gr.HTML(value=html, visible=True)
522
+ )
523
+ else:
524
+ yield (
525
+ gr.Textbox(value="✅ Analysis complete!", visible=True),
526
+ gr.Slider(value=1.0, visible=False),
527
+ gr.HTML(value=lang_status, visible=True),
528
+ gr.HTML(value=html, visible=True),
529
+ gr.Audio(value=audio_path, visible=True, label="Extracted Audio (Full Duration)"),
530
+ gr.HTML(visible=False)
531
+ )
532
+ except Exception as e:
533
+ yield (
534
+ gr.Textbox(value="❌ An unexpected error occurred!", visible=True),
535
+ gr.Slider(visible=False),
536
+ gr.HTML(value="", visible=True),
537
+ gr.HTML(value="", visible=False),
538
+ gr.Audio(value=None, visible=True, label="Extracted Audio (Full Duration)"),
539
+ gr.HTML(value=f"<p style='color: red; font-weight: bold;'>⚠️ Unexpected Error: {str(e)}</p>", visible=True)
540
+ )
541
+
542
+
543
+ def clear_inputs():
544
+ return (
545
+ "", # url_input
546
+ None, # video_input
547
+ 60, # analysis_duration (reset to default)
548
+ "Waiting for video input...", # status_box
549
+ gr.Slider(visible=False, value=0), # progress_bar (hidden and reset)
550
+ "", # language_status_html (clear)
551
+ "", # output_html (clear)
552
+ gr.Audio(visible=True, value=None, label="Extracted Audio (Full Duration)"),
553
+ "" # error_output (clear)
554
+ )
555
+
556
+ submit_btn.click(
557
+ fn=unified_processing_fn,
558
+ inputs=[url_input, video_input, analysis_duration],
559
+ outputs=[status_box, progress_bar, language_status_html, output_html, audio_player, error_output],
560
+ api_name="classify_video"
561
+ )
562
+
563
+ clear_btn.click(
564
+ fn=clear_inputs,
565
+ inputs=[],
566
+ outputs=[url_input, video_input, analysis_duration, status_box, progress_bar, language_status_html, output_html, audio_player, error_output],
567
+ )
568
+
569
+ if __name__ == "__main__":
570
+ app.launch(share=True)