Exceedea / app.py
EladSpamson's picture
Update app.py
965be1d verified
raw
history blame
4.11 kB
import os
import json
import requests
import threading
import torch
import librosa
from flask import Flask, request, jsonify
from transformers import WhisperProcessor, WhisperForConditionalGeneration
###############################################################################
# 1) Configure environment to avoid permission issues & set up model
###############################################################################
os.environ["HF_HOME"] = "/tmp/hf_cache"
os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
os.environ["HF_DATASETS_CACHE"] = "/tmp/hf_cache"
os.environ["XDG_CACHE_HOME"] = "/tmp"
app = Flask(__name__)
# Example: your custom Hebrew model
model_id = "ivrit-ai/whisper-large-v3-turbo"
processor = WhisperProcessor.from_pretrained(model_id)
model = WhisperForConditionalGeneration.from_pretrained(model_id)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
# Force Hebrew transcription (skip auto-detect)
forced_decoder_ids = processor.get_decoder_prompt_ids(language="he", task="transcribe")
# Where we send the final transcription
WEBHOOK_URL = "https://hook.eu1.make.com/86zogci73u394k2uqpulp5yjjwgm8b9x"
###############################################################################
# 2) Background transcription function
###############################################################################
def transcribe_in_background(audio_url):
"""
Called by a background thread. Downloads & transcribes audio,
then sends results to your Make.com webhook.
"""
try:
# Download audio
r = requests.get(audio_url)
audio_path = "/tmp/temp_audio.wav"
with open(audio_path, "wb") as f:
f.write(r.content)
# Load with librosa
waveform, sr = librosa.load(audio_path, sr=16000)
# Optional limit ~1 hour
max_sec = 3600
waveform = waveform[: sr * max_sec]
# Split audio into 25-second chunks
chunk_sec = 25
chunk_size = sr * chunk_sec
chunks = [waveform[i : i + chunk_size] for i in range(0, len(waveform), chunk_size)]
partial_text = ""
for chunk in chunks:
inputs = processor(chunk, sampling_rate=sr, return_tensors="pt", padding=True)
input_features = inputs.input_features.to(device)
with torch.no_grad():
predicted_ids = model.generate(
input_features,
forced_decoder_ids=forced_decoder_ids
)
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
partial_text += transcription + "\n"
# Post final transcription back to Make.com
payload = {"Transcription": partial_text.strip()}
requests.post(WEBHOOK_URL, json=payload)
except Exception as e:
# In case of errors, notify the webhook
error_payload = {"error": str(e)}
requests.post(WEBHOOK_URL, json=error_payload)
###############################################################################
# 3) Flask route: returns immediately, does the heavy lifting in a thread
###############################################################################
@app.route("/transcribe", methods=["POST"])
def transcribe_endpoint():
data = request.get_json()
audio_url = data.get("audio_url")
if not audio_url:
return jsonify({"error": "Missing 'audio_url' in request"}), 400
# Spawn a thread to handle transcription & webhook
thread = threading.Thread(target=transcribe_in_background, args=(audio_url,))
thread.start()
# Immediately return a JSON response to Make.com
return jsonify({
"status": "Received. Transcription in progress.",
"note": "Results will be sent via webhook once done."
}), 202
###############################################################################
# 4) Run app if local, else Hugging Face will use gunicorn.
###############################################################################
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)