Spaces:

usamaijaz-ai
/

accent-classifier

Sleeping

App Files Files Community

usamaijaz-ai commited on May 9

Commit

09a8733

1 Parent(s): f0fa4f3

updated fix for mp4 vidoes

Browse files

Files changed (2) hide show

app.py +46 -8
test.py +8 -0

app.py CHANGED Viewed

@@ -13,9 +13,14 @@ from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2FeatureExtra
 TEMP_VIDEO = "temp_video.mp4"
 RAW_AUDIO = "raw_audio_input"
 CONVERTED_AUDIO = "converted_audio.wav"
-MODEL_DIR = "model"
-# === Load model and feature extractor ===
 MODEL_REPO = "ylacombe/accent-classifier"
 model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_REPO, cache_dir="hf_model_cache")
 feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_REPO)
@@ -28,12 +33,45 @@ LABELS = [model.config.id2label[i] for i in range(len(model.config.id2label))]
 # === Download video from URL ===
 def download_video(url, filename=TEMP_VIDEO):
-    r = requests.get(url, stream=True)
-    r.raise_for_status()
-    with open(filename, 'wb') as f:
-        for chunk in r.iter_content(chunk_size=8192):
-            f.write(chunk)
-    return filename
 # === Extract audio from video ===
 def extract_audio_from_video(video_path, output_path=RAW_AUDIO + ".mp4"):

 TEMP_VIDEO = "temp_video.mp4"
 RAW_AUDIO = "raw_audio_input"
 CONVERTED_AUDIO = "converted_audio.wav"
+# === load local model
+# MODEL_DIR = "model"
+# model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_DIR, local_files_only=True)
+# feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_DIR)
+# # === Load model from huggingface and feature extractor ===
 MODEL_REPO = "ylacombe/accent-classifier"
 model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_REPO, cache_dir="hf_model_cache")
 feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_REPO)
 # === Download video from URL ===
 def download_video(url, filename=TEMP_VIDEO):
+    import mimetypes
+    temp_download = "raw_download.mp4"
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
+    }
+    try:
+        r = requests.get(url, headers=headers, stream=True, timeout=15)
+        r.raise_for_status()
+        content_type = r.headers.get("Content-Type", "")
+        if not content_type.startswith("video/"):
+            raise RuntimeError(f"URL does not point to a video file. Content-Type: {content_type}")
+        with open(temp_download, 'wb') as f:
+            for chunk in r.iter_content(chunk_size=8192):
+                f.write(chunk)
+    except Exception as e:
+        raise RuntimeError(f"Failed to download video: {e}")
+    # Attempt to fix the file with ffmpeg
+    repaired_file = filename
+    ffmpeg_cmd = [
+        "ffmpeg", "-y", "-i", temp_download,
+        "-c", "copy", "-movflags", "+faststart", repaired_file
+    ]
+    result = subprocess.run(ffmpeg_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    if result.returncode != 0 or not os.path.exists(repaired_file) or os.path.getsize(repaired_file) == 0:
+        print(result.stderr.decode())
+        raise RuntimeError("FFmpeg failed to process the video. File may not be a valid MP4.")
+    os.remove(temp_download)
+    return repaired_file
 # === Extract audio from video ===
 def extract_audio_from_video(video_path, output_path=RAW_AUDIO + ".mp4"):

test.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import requests
+url = "https://store3.gofile.io/download/web/7a1f0c47-93e5-45c1-90b3-e05cb8611501/sample-file.mp4"
+r = requests.get(url, allow_redirects=True)
+print("Content-Type:", r.headers.get("Content-Type"))
+print("File size (bytes):", len(r.content))
+print("First 200 bytes:\n", r.content[:200])