File size: 4,749 Bytes
d420f32
 
 
 
1c03892
 
d420f32
 
 
a414737
 
 
 
d420f32
d8bbc70
d420f32
6e13dba
d420f32
1c03892
 
 
 
 
 
 
 
 
 
 
 
a35f83d
 
 
 
 
 
 
 
 
d420f32
b877366
d420f32
 
b877366
d420f32
 
 
 
b877366
d420f32
 
 
b877366
d420f32
 
b877366
d420f32
 
b877366
d420f32
 
 
6e13dba
d420f32
 
 
 
 
93157c8
3201104
d420f32
 
93157c8
d420f32
 
 
a35f83d
d420f32
a35f83d
d420f32
 
 
 
 
6e13dba
d420f32
 
 
 
 
 
 
 
6e13dba
d420f32
 
 
 
a35f83d
d420f32
 
 
6e13dba
d420f32
 
93157c8
1c03892
d420f32
ac82b61
3201104
 
d420f32
 
 
93157c8
a35f83d
93157c8
a35f83d
d420f32
 
 
 
d8bbc70
d420f32
 
 
d8bbc70
d420f32
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from flask import Flask, request, jsonify
from datetime import datetime
import whisper
import time
import pytz
import os

app = Flask(__name__)

@app.route("/")
def hello():
    return "Semabox, listens to you!"
    
# Load the Whisper model
print("Loading Whisper model...\n", flush=True)
model = whisper.load_model("tiny")
print("\nWhisper model loaded.\n", flush=True)

# Get time of request
def get_time():
    nairobi_timezone = pytz.timezone('Africa/Nairobi')
    current_time_nairobi = datetime.now(nairobi_timezone)
    
    curr_day = current_time_nairobi.strftime('%A')
    curr_date = current_time_nairobi.strftime('%Y-%m-%d')
    curr_time = current_time_nairobi.strftime('%H:%M:%S')
    
    full_date = f"{curr_day} | {curr_date} | {curr_time}"
    return full_date, curr_time

# Convert file size from bytes to KB or MB
def convert_size(bytes):
    if bytes < 1024:
        return f"{bytes} bytes"
    elif bytes < 1024**2:
        return f"{bytes / 1024:.2f} KB"
    else:
        return f"{bytes / 1024**2:.2f} MB"

def transcribe(audio_path):
    #print(f"        Transcribing audio from: {audio_path}", flush=True)

    # Load audio and pad/trim it to fit 30 seconds
    #print("        Loading and processing audio...", flush=True)
    audio = whisper.load_audio(audio_path)
    audio = whisper.pad_or_trim(audio)

    # Make log-Mel spectrogram and move to the same device as the model
    #print("        Creating log-Mel spectrogram...", flush=True)
    mel = whisper.log_mel_spectrogram(audio).to(model.device)

    # Detect the spoken language
    #print("        Detecting language...", flush=True)
    _, probs = model.detect_language(mel)
    language = max(probs, key=probs.get)
    #print(f"        Detected language: {language}", flush=True)

    # Decode the audio
    #print("        Decoding audio...", flush=True)
    options = whisper.DecodingOptions(fp16=False)
    result = whisper.decode(model, mel, options)
    
    print("        Transcription complete.", flush=True)
    return result.text, language

@app.route('/transcribe', methods=['POST'])
def transcribe_audio():
    # Record the time when the request was received
    request_received_time, _ = get_time()
    print(f"Query {request_received_time}", flush=True)

    if 'audio' not in request.files:
        print("Error: No audio file provided", flush=True)
        return jsonify({"error": "No audio file provided"}), 400
    
    audio_file = request.files['audio']
    audio_file_size_bytes = len(audio_file.read())  # Calculate the size of the file in bytes
    audio_file.seek(0)  # Reset the file pointer after reading
    audio_file_size = convert_size(audio_file_size_bytes)  # Convert file size to KB or MB
    
    # Save the uploaded audio file
    audio_path = os.path.join("temp_audio", audio_file.filename)
    os.makedirs("temp_audio", exist_ok=True)
    audio_file.save(audio_path)
    print(f"        Audio file saved to: {audio_path} (Size: {audio_file_size})", flush=True)
    
    # Record the time before starting transcription
    transcription_start_time = time.time()
    
    # Transcribe the audio
    try:
        transcription, language = transcribe(audio_path)
    except Exception as e:
        print(f"        Error during transcription: {str(e)}", flush=True)
        return jsonify({"error": f"An error occurred: {str(e)}"}), 500
    
    # Calculate the time taken for transcription
    transcription_end_time = time.time()
    transcription_duration = round(transcription_end_time - transcription_start_time, 2)
    
    # Clean up the saved file
    os.remove(audio_path)
    print(f"        Audio file removed from: {audio_path}\n", flush=True)
    
    # Record the time when the response is being sent
    response_sent_time, _ = get_time()

    # Return the transcription, detected language, and timing information
    #print(f"        Transcription: {transcription}, Language: {language}, Processing Time: {transcription_duration}\n", flush=True)
    print(f"        \033[92mTranscription: {transcription}, Language: {language}, Processing Time: {transcription_duration}\033[0m\n", flush=True)

    return jsonify({
        "transcription": transcription,
        "language": language,
        "request_received_time": request_received_time,
        "transcription_duration_seconds": transcription_duration,
        "response_sent_time": response_sent_time,
        "audio_file_size": audio_file_size
    }), 200

@app.route('/healthcheck', methods=['GET'])
def healthcheck():
    print("Received request at /healthcheck\n", flush=True)
    return jsonify({"status": "API is running"}), 200

if __name__ == '__main__':
    print("Starting Flask app...\n", flush=True)
    app.run(host="0.0.0.0", port=5000)