File size: 3,722 Bytes
d420f32
 
 
 
 
 
 
 
 
a414737
 
 
 
d420f32
a414737
d420f32
a414737
d420f32
 
a414737
d420f32
 
a414737
d420f32
 
 
 
a414737
d420f32
 
 
a414737
d420f32
 
a414737
d420f32
 
a414737
d420f32
 
 
a414737
d420f32
 
 
 
 
 
a414737
d420f32
 
 
 
 
 
 
 
 
 
 
 
 
a414737
d420f32
 
 
 
 
 
 
 
a414737
d420f32
 
 
 
 
 
 
 
a414737
d420f32
 
 
 
 
a414737
d420f32
 
 
 
 
 
 
 
 
 
 
a414737
d420f32
 
 
a414737
d420f32
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from flask import Flask, request, jsonify
from datetime import datetime
import whisper
import os
import time


app = Flask(__name__)

@app.route("/")
def hello():
    return "Semabox, listens to you!"
    
# Load the Whisper model
print("Loading Whisper model...", flush=True)
model = whisper.load_model("tiny")
print("Whisper model loaded.", flush=True)

def transcribe(audio_path):
    print(f"Transcribing audio from: {audio_path}", flush=True)

    # Load audio and pad/trim it to fit 30 seconds
    print("Loading and processing audio...", flush=True)
    audio = whisper.load_audio(audio_path)
    audio = whisper.pad_or_trim(audio)

    # Make log-Mel spectrogram and move to the same device as the model
    print("Creating log-Mel spectrogram...", flush=True)
    mel = whisper.log_mel_spectrogram(audio).to(model.device)

    # Detect the spoken language
    print("Detecting language...", flush=True)
    _, probs = model.detect_language(mel)
    language = max(probs, key=probs.get)
    print(f"Detected language: {language}", flush=True)

    # Decode the audio
    print("Decoding audio...", flush=True)
    options = whisper.DecodingOptions(fp16=False)
    result = whisper.decode(model, mel, options)
    
    print("Transcription complete.", flush=True)
    return result.text, language

@app.route('/transcribe', methods=['POST'])
def transcribe_audio():
    # Record the time when the request was received
    request_received_time = datetime.now()
    print(f"Received request at /transcribe at {request_received_time}", flush=True)

    if 'audio' not in request.files:
        print("Error: No audio file provided")
        return jsonify({"error": "No audio file provided"}), 400
    
    audio_file = request.files['audio']
    audio_file_size = len(audio_file.read())  # Calculate the size of the file in bytes
    audio_file.seek(0)  # Reset the file pointer after reading
    
    # Save the uploaded audio file
    audio_path = os.path.join("temp_audio", audio_file.filename)
    os.makedirs("temp_audio", exist_ok=True)
    audio_file.save(audio_path)
    print(f"Audio file saved to: {audio_path} (Size: {audio_file_size} bytes)", flush=True)
    
    # Record the time before starting transcription
    transcription_start_time = time.time()
    
    # Transcribe the audio
    try:
        transcription, language = transcribe(audio_path)
    except Exception as e:
        print(f"Error during transcription: {str(e)}", flush=True)
        return jsonify({"error": f"An error occurred: {str(e)}"}), 500
    
    # Calculate the time taken for transcription
    transcription_end_time = time.time()
    transcription_duration = transcription_end_time - transcription_start_time
    
    # Clean up the saved file
    os.remove(audio_path)
    print(f"Audio file removed from: {audio_path}", flush=True)
    
    # Record the time when the response is being sent
    response_sent_time = datetime.now()
    
    # Return the transcription, detected language, and timing information
    print(f"Transcription: {transcription}, Language: {language}", flush=True)
    return jsonify({
        "transcription": transcription,
        "language": language,
        "request_received_time": request_received_time.isoformat(),
        "transcription_duration": transcription_duration,
        "response_sent_time": response_sent_time.isoformat(),
        "audio_file_size_bytes": audio_file_size
    }), 200

@app.route('/healthcheck', methods=['GET'])
def healthcheck():
    print("Received request at /healthcheck", flush=True)
    return jsonify({"status": "API is running"}), 200

if __name__ == '__main__':
    print("Starting Flask app...", flush=True)
    app.run(host="0.0.0.0", port=5000)