Spaces:
Runtime error
Runtime error
from flask import Flask, request, jsonify | |
import requests | |
import torch | |
import librosa | |
from transformers import WhisperProcessor, WhisperForConditionalGeneration | |
app = Flask(__name__) | |
# Temporarily using smaller model for faster testing | |
model_id = "openai/whisper-base" | |
processor = WhisperProcessor.from_pretrained(model_id) | |
model = WhisperForConditionalGeneration.from_pretrained(model_id) | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
model.to(device) | |
forced_decoder_ids = processor.get_decoder_prompt_ids(language="he", task="transcribe") | |
def transcribe_audio(audio_url): | |
response = requests.get(audio_url) | |
with open("temp_audio.wav", "wb") as f: | |
f.write(response.content) | |
waveform, sr = librosa.load("temp_audio.wav", sr=16000) | |
max_duration_sec = 3600 | |
waveform = waveform[:sr * max_duration_sec] | |
chunk_duration_sec = 25 | |
chunk_size = sr * chunk_duration_sec | |
chunks = [waveform[i:i + chunk_size] for i in range(0, len(waveform), chunk_size)] | |
partial_text = "" | |
for chunk in chunks: | |
inputs = processor(chunk, sampling_rate=16000, return_tensors="pt", padding=True) | |
input_features = inputs.input_features.to(device) | |
with torch.no_grad(): | |
predicted_ids = model.generate(input_features, forced_decoder_ids=forced_decoder_ids) | |
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] | |
partial_text += transcription + "\n" | |
return partial_text.strip() | |
def transcribe_endpoint(): | |
data = request.get_json() | |
audio_url = data.get('audio_url') | |
if not audio_url: | |
return jsonify({"error": "Missing 'audio_url' in request"}), 400 | |
transcription = transcribe_audio(audio_url) | |
return jsonify({"transcription": transcription}) | |
if __name__ == '__main__': | |
app.run(host="0.0.0.0", port=7860) |