Spaces:
Runtime error
Runtime error
File size: 4,835 Bytes
fdb39f4 67a7670 886af50 965be1d e6cfbd7 965be1d 221d07a 3d55353 965be1d 697c7eb 965be1d 886af50 5b89128 221d07a 0348b75 965be1d 66d0ca2 965be1d 697c7eb 965be1d 697c7eb 965be1d 697c7eb 965be1d c6798f9 965be1d c6798f9 697c7eb 965be1d c6798f9 697c7eb c6798f9 697c7eb 965be1d 697c7eb c6798f9 697c7eb 965be1d 697c7eb 965be1d 66d0ca2 aa43ea6 697c7eb aa43ea6 66d0ca2 aa43ea6 697c7eb 965be1d 697c7eb 965be1d 67a7670 965be1d 697c7eb 965be1d 66d0ca2 aa43ea6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import os
import json
import requests
import threading
import torch
import librosa
from flask import Flask, request, jsonify
from transformers import WhisperProcessor, WhisperForConditionalGeneration
###############################################################################
# 1) Configure environment & set up model
###############################################################################
os.environ["HF_HOME"] = "/tmp/hf_cache"
os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
os.environ["HF_DATASETS_CACHE"] = "/tmp/hf_cache"
os.environ["XDG_CACHE_HOME"] = "/tmp"
app = Flask(__name__)
model_id = "ivrit-ai/whisper-large-v3-turbo"
processor = WhisperProcessor.from_pretrained(model_id)
model = WhisperForConditionalGeneration.from_pretrained(model_id)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
# Force Hebrew transcription (skip auto-detect)
forced_decoder_ids = processor.get_decoder_prompt_ids(language="he", task="transcribe")
# Where we send the final transcription
WEBHOOK_URL = "https://hook.eu1.make.com/86zogci73u394k2uqpulp5yjjwgm8b9x"
###############################################################################
# 2) Background transcription function
###############################################################################
def transcribe_in_background(audio_url, file_id, company, user):
"""
Called by a background thread. Downloads & transcribes audio,
then sends results to your Make.com webhook.
"""
try:
# 1) Download the audio
r = requests.get(audio_url)
audio_path = "/tmp/temp_audio.wav"
with open(audio_path, "wb") as f:
f.write(r.content)
# 2) Load with librosa
waveform, sr = librosa.load(audio_path, sr=16000)
# Optional: limit to ~1 hour
max_sec = 3600
waveform = waveform[: sr * max_sec]
# Calculate total duration actually analyzed
call_duration = int(len(waveform) / sr) # Rounded to nearest second
# 3) Split audio into 25-second chunks
chunk_sec = 25
chunk_size = sr * chunk_sec
chunks = [waveform[i : i + chunk_size] for i in range(0, len(waveform), chunk_size)]
partial_text = ""
for chunk in chunks:
inputs = processor(chunk, sampling_rate=sr, return_tensors="pt", padding=True)
input_features = inputs.input_features.to(device)
with torch.no_grad():
predicted_ids = model.generate(
input_features,
forced_decoder_ids=forced_decoder_ids
)
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
partial_text += transcription + "\n"
# 4) Post final transcription back to Make.com, including extra fields
payload = {
"Transcription": partial_text.strip(),
"callDuration": call_duration,
"fileId": file_id,
"company": company,
"user": user
}
requests.post(WEBHOOK_URL, json=payload)
except Exception as e:
# In case of errors, notify the webhook
error_payload = {
"error": str(e),
"fileId": file_id,
"company": company,
"user": user
# You could optionally include "callDuration" here if relevant
}
requests.post(WEBHOOK_URL, json=error_payload)
###############################################################################
# 3) Flask route: returns immediately, transcribes in a separate thread
###############################################################################
@app.route("/transcribe", methods=["POST"])
def transcribe_endpoint():
# 1) Get JSON data from request
data = request.get_json()
audio_url = data.get("audio_url")
if not audio_url:
return jsonify({"error": "Missing 'audio_url' in request"}), 400
# 2) Read custom headers (fileId, company, user)
file_id = request.headers.get("fileId", "")
company = request.headers.get("company", "")
user = request.headers.get("user", "")
# 3) Spawn a thread to handle transcription
thread = threading.Thread(
target=transcribe_in_background,
args=(audio_url, file_id, company, user)
)
thread.start()
# 4) Immediately return a JSON response
return jsonify({
"status": "Received. Transcription in progress.",
"note": "Results will be sent via webhook once done."
}), 202
###############################################################################
# 4) Run app if local; on HF Spaces, gunicorn is used
###############################################################################
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)
|