import os import json import requests import threading import torch import librosa from flask import Flask, request, jsonify from transformers import WhisperProcessor, WhisperForConditionalGeneration ############################################################################### # 1) Configure environment & set up model ############################################################################### os.environ["HF_HOME"] = "/tmp/hf_cache" os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache" os.environ["HF_DATASETS_CACHE"] = "/tmp/hf_cache" os.environ["XDG_CACHE_HOME"] = "/tmp" app = Flask(__name__) model_id = "ivrit-ai/whisper-large-v3-turbo" processor = WhisperProcessor.from_pretrained(model_id) model = WhisperForConditionalGeneration.from_pretrained(model_id) device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) # Force Hebrew transcription (skip auto-detect) forced_decoder_ids = processor.get_decoder_prompt_ids(language="he", task="transcribe") # Where we send the final transcription WEBHOOK_URL = "https://hook.eu1.make.com/86zogci73u394k2uqpulp5yjjwgm8b9x" ############################################################################### # 2) Background transcription function ############################################################################### def transcribe_in_background(audio_url, file_id, company, user): """ Called by a background thread. Downloads & transcribes audio, then sends results to your Make.com webhook. """ try: # 1) Download the audio r = requests.get(audio_url) audio_path = "/tmp/temp_audio.wav" with open(audio_path, "wb") as f: f.write(r.content) # 2) Load with librosa waveform, sr = librosa.load(audio_path, sr=16000) # Optional: limit to ~1 hour max_sec = 3600 waveform = waveform[: sr * max_sec] # Calculate total duration actually analyzed call_duration = int(len(waveform) / sr) # Rounded to nearest second # 3) Split audio into 25-second chunks chunk_sec = 25 chunk_size = sr * chunk_sec chunks = [waveform[i : i + chunk_size] for i in range(0, len(waveform), chunk_size)] partial_text = "" for chunk in chunks: inputs = processor(chunk, sampling_rate=sr, return_tensors="pt", padding=True) input_features = inputs.input_features.to(device) with torch.no_grad(): predicted_ids = model.generate( input_features, forced_decoder_ids=forced_decoder_ids ) transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] partial_text += transcription + "\n" # 4) Post final transcription back to Make.com, including extra fields payload = { "Transcription": partial_text.strip(), "callDuration": call_duration, "fileId": file_id, "company": company, "user": user } requests.post(WEBHOOK_URL, json=payload) except Exception as e: # In case of errors, notify the webhook error_payload = { "error": str(e), "fileId": file_id, "company": company, "user": user # You could optionally include "callDuration" here if relevant } requests.post(WEBHOOK_URL, json=error_payload) ############################################################################### # 3) Flask route: returns immediately, transcribes in a separate thread ############################################################################### @app.route("/transcribe", methods=["POST"]) def transcribe_endpoint(): # 1) Get JSON data from request data = request.get_json() audio_url = data.get("audio_url") if not audio_url: return jsonify({"error": "Missing 'audio_url' in request"}), 400 # 2) Read custom headers (fileId, company, user) file_id = request.headers.get("fileId", "") company = request.headers.get("company", "") user = request.headers.get("user", "") # 3) Spawn a thread to handle transcription thread = threading.Thread( target=transcribe_in_background, args=(audio_url, file_id, company, user) ) thread.start() # 4) Immediately return a JSON response return jsonify({ "status": "Received. Transcription in progress.", "note": "Results will be sent via webhook once done." }), 202 ############################################################################### # 4) Run app if local; on HF Spaces, gunicorn is used ############################################################################### if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)