EladSpamson commited on
Commit
040da24
·
verified ·
1 Parent(s): aa43ea6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -5
app.py CHANGED
@@ -26,23 +26,40 @@ def transcribe_audio(audio_url):
26
  with open(audio_path, "wb") as f:
27
  f.write(response.content)
28
 
 
29
  waveform, sr = librosa.load(audio_path, sr=16000)
 
 
30
  max_duration_sec = 3600
31
  waveform = waveform[:sr * max_duration_sec]
32
 
 
33
  chunk_duration_sec = 25
34
  chunk_size = sr * chunk_duration_sec
35
- chunks = [waveform[i:i + chunk_size] for i in range(0, len(waveform), chunk_size)]
36
 
37
  partial_text = ""
38
  for chunk in chunks:
39
- inputs = processor(chunk, sampling_rate=16000, return_tensors="pt", padding=True)
 
 
 
 
 
40
  input_features = inputs.input_features.to(device)
41
 
 
42
  with torch.no_grad():
43
- predicted_ids = model.generate(input_features, forced_decoder_ids=forced_decoder_ids)
 
 
 
 
 
 
 
 
44
 
45
- transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
46
  partial_text += transcription + "\n"
47
 
48
  return partial_text.strip()
@@ -55,7 +72,6 @@ def transcribe_endpoint():
55
  return jsonify({"error": "Missing 'audio_url' in request"}), 400
56
 
57
  transcription = transcribe_audio(audio_url)
58
-
59
  return jsonify({"transcription": transcription})
60
 
61
  if __name__ == '__main__':
 
26
  with open(audio_path, "wb") as f:
27
  f.write(response.content)
28
 
29
+ # Load audio
30
  waveform, sr = librosa.load(audio_path, sr=16000)
31
+
32
+ # Safety limit (1 hour)
33
  max_duration_sec = 3600
34
  waveform = waveform[:sr * max_duration_sec]
35
 
36
+ # Split into smaller chunks
37
  chunk_duration_sec = 25
38
  chunk_size = sr * chunk_duration_sec
39
+ chunks = [waveform[i : i + chunk_size] for i in range(0, len(waveform), chunk_size)]
40
 
41
  partial_text = ""
42
  for chunk in chunks:
43
+ inputs = processor(
44
+ chunk,
45
+ sampling_rate=16000,
46
+ return_tensors="pt",
47
+ padding=True
48
+ )
49
  input_features = inputs.input_features.to(device)
50
 
51
+ # Generate text
52
  with torch.no_grad():
53
+ predicted_ids = model.generate(
54
+ input_features,
55
+ forced_decoder_ids=forced_decoder_ids
56
+ )
57
+
58
+ transcription = processor.batch_decode(
59
+ predicted_ids,
60
+ skip_special_tokens=True
61
+ )[0]
62
 
 
63
  partial_text += transcription + "\n"
64
 
65
  return partial_text.strip()
 
72
  return jsonify({"error": "Missing 'audio_url' in request"}), 400
73
 
74
  transcription = transcribe_audio(audio_url)
 
75
  return jsonify({"transcription": transcription})
76
 
77
  if __name__ == '__main__':