Spaces:
Running
Running
import os | |
import json | |
import requests | |
import threading | |
import torch | |
import librosa | |
from flask import Flask, request, jsonify | |
from transformers import WhisperProcessor, WhisperForConditionalGeneration | |
# GLOBAL concurrency counter & lock | |
concurrent_requests = 0 | |
concurrent_requests_lock = threading.Lock() | |
app = Flask(__name__) | |
model_id = "ivrit-ai/whisper-large-v3-turbo" | |
processor = WhisperProcessor.from_pretrained(model_id) | |
model = WhisperForConditionalGeneration.from_pretrained(model_id) | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
model.to(device) | |
forced_decoder_ids = processor.get_decoder_prompt_ids(language="he", task="transcribe") | |
WEBHOOK_URL = "https://hook.eu1.make.com/86zogci73u394k2uqpulp5yjjwgm8b9x" | |
def transcribe_in_background(audio_url, file_id, company, user, file_name): | |
global concurrent_requests | |
try: | |
# Download audio | |
r = requests.get(audio_url) | |
audio_path = "/tmp/temp_audio.wav" | |
with open(audio_path, "wb") as f: | |
f.write(r.content) | |
# Load audio & limit to 1 hour | |
waveform, sr = librosa.load(audio_path, sr=16000) | |
max_sec = 3600 | |
waveform = waveform[: sr * max_sec] | |
call_duration = int(len(waveform) / sr) | |
# Transcribe in 25-second chunks | |
chunk_sec = 25 | |
chunk_size = sr * chunk_sec | |
chunks = [waveform[i : i + chunk_size] for i in range(0, len(waveform), chunk_size)] | |
partial_text = "" | |
for chunk in chunks: | |
inputs = processor(chunk, sampling_rate=sr, return_tensors="pt", padding=True) | |
input_features = inputs.input_features.to(device) | |
with torch.no_grad(): | |
predicted_ids = model.generate( | |
input_features, | |
forced_decoder_ids=forced_decoder_ids | |
) | |
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] | |
partial_text += transcription + "\n" | |
# Send result to webhook | |
payload = { | |
"Transcription": partial_text.strip(), | |
"callDuration": call_duration, | |
"fileId": file_id, | |
"company": company, | |
"user": user, | |
"fileName": file_name | |
} | |
requests.post(WEBHOOK_URL, json=payload) | |
except Exception as e: | |
error_payload = { | |
"error": str(e), | |
"fileId": file_id, | |
"company": company, | |
"user": user, | |
"fileName": file_name | |
} | |
requests.post(WEBHOOK_URL, json=error_payload) | |
finally: | |
# Decrement concurrency count | |
with concurrent_requests_lock: | |
concurrent_requests -= 1 | |
def transcribe_endpoint(): | |
global concurrent_requests | |
# We only allow ONE job at a time: | |
with concurrent_requests_lock: | |
if concurrent_requests >= 1: | |
# Return a 200 (OK) and a JSON message | |
return jsonify({ | |
"message": "Server is already processing another job, please try again later." | |
}), 200 | |
# If it's free, occupy the slot | |
concurrent_requests += 1 | |
data = request.get_json() | |
audio_url = data.get("audio_url") | |
if not audio_url: | |
# If missing the audio_url, free the slot we claimed | |
with concurrent_requests_lock: | |
concurrent_requests -= 1 | |
return jsonify({"error": "Missing 'audio_url' in request"}), 400 | |
# Read headers | |
file_id = request.headers.get("fileId", "") | |
company = request.headers.get("company", "") | |
user = request.headers.get("user", "") | |
file_name = request.headers.get("fileName", "") | |
# Spawn a background thread | |
thread = threading.Thread( | |
target=transcribe_in_background, | |
args=(audio_url, file_id, company, user, file_name) | |
) | |
thread.start() | |
return jsonify({ | |
"status": "Received. Transcription in progress.", | |
"note": "Results will be sent via webhook once done." | |
}), 202 | |
if __name__ == "__main__": | |
app.run(host="0.0.0.0", port=7860) | |