Spaces:
Sleeping
Sleeping
Commit
·
09a8733
1
Parent(s):
f0fa4f3
updated fix for mp4 vidoes
Browse files
app.py
CHANGED
@@ -13,9 +13,14 @@ from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2FeatureExtra
|
|
13 |
TEMP_VIDEO = "temp_video.mp4"
|
14 |
RAW_AUDIO = "raw_audio_input"
|
15 |
CONVERTED_AUDIO = "converted_audio.wav"
|
16 |
-
MODEL_DIR = "model"
|
17 |
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
MODEL_REPO = "ylacombe/accent-classifier"
|
20 |
model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_REPO, cache_dir="hf_model_cache")
|
21 |
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_REPO)
|
@@ -28,12 +33,45 @@ LABELS = [model.config.id2label[i] for i in range(len(model.config.id2label))]
|
|
28 |
|
29 |
# === Download video from URL ===
|
30 |
def download_video(url, filename=TEMP_VIDEO):
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
# === Extract audio from video ===
|
39 |
def extract_audio_from_video(video_path, output_path=RAW_AUDIO + ".mp4"):
|
|
|
13 |
TEMP_VIDEO = "temp_video.mp4"
|
14 |
RAW_AUDIO = "raw_audio_input"
|
15 |
CONVERTED_AUDIO = "converted_audio.wav"
|
|
|
16 |
|
17 |
+
|
18 |
+
# === load local model
|
19 |
+
# MODEL_DIR = "model"
|
20 |
+
# model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_DIR, local_files_only=True)
|
21 |
+
# feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_DIR)
|
22 |
+
|
23 |
+
# # === Load model from huggingface and feature extractor ===
|
24 |
MODEL_REPO = "ylacombe/accent-classifier"
|
25 |
model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_REPO, cache_dir="hf_model_cache")
|
26 |
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_REPO)
|
|
|
33 |
|
34 |
# === Download video from URL ===
|
35 |
def download_video(url, filename=TEMP_VIDEO):
|
36 |
+
import mimetypes
|
37 |
+
|
38 |
+
temp_download = "raw_download.mp4"
|
39 |
+
headers = {
|
40 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
|
41 |
+
}
|
42 |
+
|
43 |
+
try:
|
44 |
+
r = requests.get(url, headers=headers, stream=True, timeout=15)
|
45 |
+
r.raise_for_status()
|
46 |
+
|
47 |
+
content_type = r.headers.get("Content-Type", "")
|
48 |
+
if not content_type.startswith("video/"):
|
49 |
+
raise RuntimeError(f"URL does not point to a video file. Content-Type: {content_type}")
|
50 |
+
|
51 |
+
with open(temp_download, 'wb') as f:
|
52 |
+
for chunk in r.iter_content(chunk_size=8192):
|
53 |
+
f.write(chunk)
|
54 |
+
|
55 |
+
except Exception as e:
|
56 |
+
raise RuntimeError(f"Failed to download video: {e}")
|
57 |
+
|
58 |
+
# Attempt to fix the file with ffmpeg
|
59 |
+
repaired_file = filename
|
60 |
+
ffmpeg_cmd = [
|
61 |
+
"ffmpeg", "-y", "-i", temp_download,
|
62 |
+
"-c", "copy", "-movflags", "+faststart", repaired_file
|
63 |
+
]
|
64 |
+
result = subprocess.run(ffmpeg_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
65 |
+
|
66 |
+
if result.returncode != 0 or not os.path.exists(repaired_file) or os.path.getsize(repaired_file) == 0:
|
67 |
+
print(result.stderr.decode())
|
68 |
+
raise RuntimeError("FFmpeg failed to process the video. File may not be a valid MP4.")
|
69 |
+
|
70 |
+
os.remove(temp_download)
|
71 |
+
return repaired_file
|
72 |
+
|
73 |
+
|
74 |
+
|
75 |
|
76 |
# === Extract audio from video ===
|
77 |
def extract_audio_from_video(video_path, output_path=RAW_AUDIO + ".mp4"):
|
test.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
|
3 |
+
url = "https://store3.gofile.io/download/web/7a1f0c47-93e5-45c1-90b3-e05cb8611501/sample-file.mp4"
|
4 |
+
r = requests.get(url, allow_redirects=True)
|
5 |
+
|
6 |
+
print("Content-Type:", r.headers.get("Content-Type"))
|
7 |
+
print("File size (bytes):", len(r.content))
|
8 |
+
print("First 200 bytes:\n", r.content[:200])
|