File size: 4,134 Bytes
fdb39f4
67a7670
886af50
965be1d
e6cfbd7
 
965be1d
 
221d07a
3d55353
5f0d37b
57dbbfb
 
965be1d
886af50
 
5b89128
221d07a
 
 
 
 
0348b75
66d0ca2
 
965be1d
 
63884a4
755f6de
57dbbfb
965be1d
5f0d37b
965be1d
 
 
 
 
63884a4
965be1d
 
 
 
755f6de
c6798f9
63884a4
965be1d
 
 
5f0d37b
965be1d
 
 
 
 
 
63884a4
 
 
 
965be1d
 
 
63884a4
697c7eb
 
c6798f9
697c7eb
 
755f6de
 
697c7eb
965be1d
 
 
697c7eb
 
 
 
755f6de
 
697c7eb
965be1d
 
57dbbfb
63884a4
57dbbfb
 
 
63884a4
66d0ca2
aa43ea6
57dbbfb
 
63884a4
57dbbfb
 
63884a4
 
 
 
 
 
57dbbfb
 
aa43ea6
66d0ca2
aa43ea6
63884a4
57dbbfb
 
aa43ea6
 
63884a4
697c7eb
 
 
755f6de
697c7eb
5f0d37b
697c7eb
 
755f6de
697c7eb
965be1d
 
 
 
 
 
67a7670
63884a4
66d0ca2
aa43ea6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import os
import json
import requests
import threading
import torch
import librosa

from flask import Flask, request, jsonify
from transformers import WhisperProcessor, WhisperForConditionalGeneration

# GLOBAL concurrency counter & lock
concurrent_requests = 0
concurrent_requests_lock = threading.Lock()

app = Flask(__name__)

model_id = "ivrit-ai/whisper-large-v3-turbo"
processor = WhisperProcessor.from_pretrained(model_id)
model = WhisperForConditionalGeneration.from_pretrained(model_id)

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

forced_decoder_ids = processor.get_decoder_prompt_ids(language="he", task="transcribe")

WEBHOOK_URL = "https://hook.eu1.make.com/86zogci73u394k2uqpulp5yjjwgm8b9x"


def transcribe_in_background(audio_url, file_id, company, user, file_name):
    global concurrent_requests
    try:
        # Download audio
        r = requests.get(audio_url)
        audio_path = "/tmp/temp_audio.wav"
        with open(audio_path, "wb") as f:
            f.write(r.content)

        # Load audio & limit to 1 hour
        waveform, sr = librosa.load(audio_path, sr=16000)
        max_sec = 3600
        waveform = waveform[: sr * max_sec]

        call_duration = int(len(waveform) / sr)

        # Transcribe in 25-second chunks
        chunk_sec = 25
        chunk_size = sr * chunk_sec
        chunks = [waveform[i : i + chunk_size] for i in range(0, len(waveform), chunk_size)]

        partial_text = ""
        for chunk in chunks:
            inputs = processor(chunk, sampling_rate=sr, return_tensors="pt", padding=True)
            input_features = inputs.input_features.to(device)

            with torch.no_grad():
                predicted_ids = model.generate(
                    input_features,
                    forced_decoder_ids=forced_decoder_ids
                )
            transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
            partial_text += transcription + "\n"

        # Send result to webhook
        payload = {
            "Transcription": partial_text.strip(),
            "callDuration": call_duration,
            "fileId": file_id,
            "company": company,
            "user": user,
            "fileName": file_name
        }
        requests.post(WEBHOOK_URL, json=payload)

    except Exception as e:
        error_payload = {
            "error": str(e),
            "fileId": file_id,
            "company": company,
            "user": user,
            "fileName": file_name
        }
        requests.post(WEBHOOK_URL, json=error_payload)

    finally:
        # Decrement concurrency count
        with concurrent_requests_lock:
            concurrent_requests -= 1


@app.route("/transcribe", methods=["POST"])
def transcribe_endpoint():
    global concurrent_requests

    # We only allow ONE job at a time:
    with concurrent_requests_lock:
        if concurrent_requests >= 1:
            # Return a 200 (OK) and a JSON message 
            return jsonify({
                "message": "Server is already processing another job, please try again later."
            }), 200
        
        # If it's free, occupy the slot
        concurrent_requests += 1

    data = request.get_json()
    audio_url = data.get("audio_url")
    if not audio_url:
        # If missing the audio_url, free the slot we claimed
        with concurrent_requests_lock:
            concurrent_requests -= 1
        return jsonify({"error": "Missing 'audio_url' in request"}), 400

    # Read headers
    file_id = request.headers.get("fileId", "")
    company = request.headers.get("company", "")
    user = request.headers.get("user", "")
    file_name = request.headers.get("fileName", "")

    # Spawn a background thread
    thread = threading.Thread(
        target=transcribe_in_background,
        args=(audio_url, file_id, company, user, file_name)
    )
    thread.start()

    return jsonify({
        "status": "Received. Transcription in progress.",
        "note": "Results will be sent via webhook once done."
    }), 202


if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)