EladSpamson commited on
Commit
886af50
·
verified ·
1 Parent(s): e2ba5da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -21
app.py CHANGED
@@ -1,25 +1,31 @@
1
- import gradio as gr
2
  import torch
3
  import librosa
4
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
 
5
 
6
- model_id = "ivrit-ai/whisper-large-v3-turbo"
 
 
7
  processor = WhisperProcessor.from_pretrained(model_id)
8
  model = WhisperForConditionalGeneration.from_pretrained(model_id)
9
 
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
11
  model.to(device)
12
 
13
- # Force Hebrew transcription
14
  forced_decoder_ids = processor.get_decoder_prompt_ids(language="he", task="transcribe")
15
 
16
- def transcribe_audio(audio_file):
17
- waveform, sr = librosa.load(audio_file, sr=16000)
18
- max_audio_sec = 3600 # recommended 1-hour limit for stability
19
- waveform = waveform[:sr * max_audio_sec]
 
 
 
 
20
 
21
- chunk_duration_s = 25
22
- chunk_size = sr * chunk_duration_s
23
  chunks = [waveform[i:i + chunk_size] for i in range(0, len(waveform), chunk_size)]
24
 
25
  partial_text = ""
@@ -30,23 +36,24 @@ def transcribe_audio(audio_file):
30
  with torch.no_grad():
31
  predicted_ids = model.generate(
32
  input_features,
33
- max_new_tokens=444,
34
  forced_decoder_ids=forced_decoder_ids
35
  )
36
 
37
- text_chunk = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
38
- partial_text += text_chunk + "\n"
39
-
40
- return partial_text # no yield, just final result
41
 
42
- with gr.Blocks() as demo:
43
- gr.Markdown("## Exceedea Transcription")
44
 
45
- audio_input = gr.Audio(type="filepath", label="Upload Audio (Truncate to 1 hour)")
46
- output_text = gr.Textbox(label="Full Transcription")
 
 
 
 
47
 
48
- start_btn = gr.Button("Start Transcription")
49
 
50
- start_btn.click(transcribe_audio, inputs=audio_input, outputs=output_text)
51
 
52
- demo.launch()
 
 
1
+ import requests
2
  import torch
3
  import librosa
4
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
5
+ from flask import Flask, request, jsonify
6
 
7
+ app = Flask(__name__)
8
+
9
+ model_id = "openai/whisper-large-v3"
10
  processor = WhisperProcessor.from_pretrained(model_id)
11
  model = WhisperForConditionalGeneration.from_pretrained(model_id)
12
 
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
14
  model.to(device)
15
 
 
16
  forced_decoder_ids = processor.get_decoder_prompt_ids(language="he", task="transcribe")
17
 
18
+ def transcribe_audio(audio_url):
19
+ response = requests.get(audio_url)
20
+ with open("temp_audio.wav", "wb") as f:
21
+ f.write(response.content)
22
+
23
+ waveform, sr = librosa.load("temp_audio.wav", sr=16000)
24
+ max_duration_sec = 3600
25
+ waveform = waveform[:sr * max_duration_sec]
26
 
27
+ chunk_duration_sec = 25
28
+ chunk_size = sr * chunk_duration_sec
29
  chunks = [waveform[i:i + chunk_size] for i in range(0, len(waveform), chunk_size)]
30
 
31
  partial_text = ""
 
36
  with torch.no_grad():
37
  predicted_ids = model.generate(
38
  input_features,
 
39
  forced_decoder_ids=forced_decoder_ids
40
  )
41
 
42
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
43
+ partial_text += transcription + "\n"
 
 
44
 
45
+ return partial_text.strip()
 
46
 
47
+ @app.route('/transcribe', methods=['POST'])
48
+ def transcribe_endpoint():
49
+ data = request.get_json()
50
+ audio_url = data.get('audio_url')
51
+ if not audio_url:
52
+ return jsonify({"error": "Missing 'audio_url' in request"}), 400
53
 
54
+ transcription = transcribe_audio(audio_url)
55
 
56
+ return jsonify({"transcription": transcription})
57
 
58
+ if __name__ == '__main__':
59
+ app.run(host="0.0.0.0", port=8080)