usamaijaz-ai commited on
Commit
09a8733
·
1 Parent(s): f0fa4f3

updated fix for mp4 vidoes

Browse files
Files changed (2) hide show
  1. app.py +46 -8
  2. test.py +8 -0
app.py CHANGED
@@ -13,9 +13,14 @@ from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2FeatureExtra
13
  TEMP_VIDEO = "temp_video.mp4"
14
  RAW_AUDIO = "raw_audio_input"
15
  CONVERTED_AUDIO = "converted_audio.wav"
16
- MODEL_DIR = "model"
17
 
18
- # === Load model and feature extractor ===
 
 
 
 
 
 
19
  MODEL_REPO = "ylacombe/accent-classifier"
20
  model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_REPO, cache_dir="hf_model_cache")
21
  feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_REPO)
@@ -28,12 +33,45 @@ LABELS = [model.config.id2label[i] for i in range(len(model.config.id2label))]
28
 
29
  # === Download video from URL ===
30
  def download_video(url, filename=TEMP_VIDEO):
31
- r = requests.get(url, stream=True)
32
- r.raise_for_status()
33
- with open(filename, 'wb') as f:
34
- for chunk in r.iter_content(chunk_size=8192):
35
- f.write(chunk)
36
- return filename
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  # === Extract audio from video ===
39
  def extract_audio_from_video(video_path, output_path=RAW_AUDIO + ".mp4"):
 
13
  TEMP_VIDEO = "temp_video.mp4"
14
  RAW_AUDIO = "raw_audio_input"
15
  CONVERTED_AUDIO = "converted_audio.wav"
 
16
 
17
+
18
+ # === load local model
19
+ # MODEL_DIR = "model"
20
+ # model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_DIR, local_files_only=True)
21
+ # feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_DIR)
22
+
23
+ # # === Load model from huggingface and feature extractor ===
24
  MODEL_REPO = "ylacombe/accent-classifier"
25
  model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_REPO, cache_dir="hf_model_cache")
26
  feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_REPO)
 
33
 
34
  # === Download video from URL ===
35
  def download_video(url, filename=TEMP_VIDEO):
36
+ import mimetypes
37
+
38
+ temp_download = "raw_download.mp4"
39
+ headers = {
40
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
41
+ }
42
+
43
+ try:
44
+ r = requests.get(url, headers=headers, stream=True, timeout=15)
45
+ r.raise_for_status()
46
+
47
+ content_type = r.headers.get("Content-Type", "")
48
+ if not content_type.startswith("video/"):
49
+ raise RuntimeError(f"URL does not point to a video file. Content-Type: {content_type}")
50
+
51
+ with open(temp_download, 'wb') as f:
52
+ for chunk in r.iter_content(chunk_size=8192):
53
+ f.write(chunk)
54
+
55
+ except Exception as e:
56
+ raise RuntimeError(f"Failed to download video: {e}")
57
+
58
+ # Attempt to fix the file with ffmpeg
59
+ repaired_file = filename
60
+ ffmpeg_cmd = [
61
+ "ffmpeg", "-y", "-i", temp_download,
62
+ "-c", "copy", "-movflags", "+faststart", repaired_file
63
+ ]
64
+ result = subprocess.run(ffmpeg_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
65
+
66
+ if result.returncode != 0 or not os.path.exists(repaired_file) or os.path.getsize(repaired_file) == 0:
67
+ print(result.stderr.decode())
68
+ raise RuntimeError("FFmpeg failed to process the video. File may not be a valid MP4.")
69
+
70
+ os.remove(temp_download)
71
+ return repaired_file
72
+
73
+
74
+
75
 
76
  # === Extract audio from video ===
77
  def extract_audio_from_video(video_path, output_path=RAW_AUDIO + ".mp4"):
test.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+
3
+ url = "https://store3.gofile.io/download/web/7a1f0c47-93e5-45c1-90b3-e05cb8611501/sample-file.mp4"
4
+ r = requests.get(url, allow_redirects=True)
5
+
6
+ print("Content-Type:", r.headers.get("Content-Type"))
7
+ print("File size (bytes):", len(r.content))
8
+ print("First 200 bytes:\n", r.content[:200])