File size: 4,140 Bytes
d420f32
 
 
 
1c03892
 
d420f32
 
 
 
a414737
 
 
 
d420f32
a414737
d420f32
a414737
d420f32
1c03892
 
 
 
 
 
 
 
 
 
 
 
 
d420f32
a414737
d420f32
 
a414737
d420f32
 
 
 
a414737
d420f32
 
 
a414737
d420f32
 
a414737
d420f32
 
a414737
d420f32
 
 
a414737
d420f32
 
 
 
 
93157c8
a414737
d420f32
 
93157c8
d420f32
 
 
 
 
 
 
 
 
 
a414737
d420f32
 
 
 
 
 
 
 
a414737
d420f32
 
 
 
 
 
 
 
a414737
d420f32
 
93157c8
1c03892
d420f32
a414737
d420f32
 
 
93157c8
d420f32
93157c8
d420f32
 
 
 
 
a414737
d420f32
 
 
a414737
d420f32
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
from flask import Flask, request, jsonify
from datetime import datetime
import whisper
import time
import pytz
import os


app = Flask(__name__)

@app.route("/")
def hello():
    return "Semabox, listens to you!"
    
# Load the Whisper model
print("Loading Whisper model...", flush=True)
model = whisper.load_model("tiny")
print("Whisper model loaded.", flush=True)

# Get time of request
def get_time():
    nairobi_timezone = pytz.timezone('Africa/Nairobi')
    current_time_nairobi = datetime.now(nairobi_timezone)
    
    curr_day = current_time_nairobi.strftime('%A')
    curr_date = current_time_nairobi.strftime('%Y-%m-%d')
    curr_time = current_time_nairobi.strftime('%H:%M:%S')
    
    full_date = f"{curr_day} | {curr_date} | {curr_time}"
    return full_date, curr_time

    
def transcribe(audio_path):
    print(f"Transcribing audio from: {audio_path}", flush=True)

    # Load audio and pad/trim it to fit 30 seconds
    print("Loading and processing audio...", flush=True)
    audio = whisper.load_audio(audio_path)
    audio = whisper.pad_or_trim(audio)

    # Make log-Mel spectrogram and move to the same device as the model
    print("Creating log-Mel spectrogram...", flush=True)
    mel = whisper.log_mel_spectrogram(audio).to(model.device)

    # Detect the spoken language
    print("Detecting language...", flush=True)
    _, probs = model.detect_language(mel)
    language = max(probs, key=probs.get)
    print(f"Detected language: {language}", flush=True)

    # Decode the audio
    print("Decoding audio...", flush=True)
    options = whisper.DecodingOptions(fp16=False)
    result = whisper.decode(model, mel, options)
    
    print("Transcription complete.", flush=True)
    return result.text, language

@app.route('/transcribe', methods=['POST'])
def transcribe_audio():
    # Record the time when the request was received
    request_received_time, _ = get_time()
    print(f"Received request at /transcribe at {request_received_time}", flush=True)

    if 'audio' not in request.files:
        print("Error: No audio file provided", flush=True)
        return jsonify({"error": "No audio file provided"}), 400
    
    audio_file = request.files['audio']
    audio_file_size = len(audio_file.read())  # Calculate the size of the file in bytes
    audio_file.seek(0)  # Reset the file pointer after reading
    
    # Save the uploaded audio file
    audio_path = os.path.join("temp_audio", audio_file.filename)
    os.makedirs("temp_audio", exist_ok=True)
    audio_file.save(audio_path)
    print(f"Audio file saved to: {audio_path} (Size: {audio_file_size} bytes)", flush=True)
    
    # Record the time before starting transcription
    transcription_start_time = time.time()
    
    # Transcribe the audio
    try:
        transcription, language = transcribe(audio_path)
    except Exception as e:
        print(f"Error during transcription: {str(e)}", flush=True)
        return jsonify({"error": f"An error occurred: {str(e)}"}), 500
    
    # Calculate the time taken for transcription
    transcription_end_time = time.time()
    transcription_duration = transcription_end_time - transcription_start_time
    
    # Clean up the saved file
    os.remove(audio_path)
    print(f"Audio file removed from: {audio_path}", flush=True)
    
    # Record the time when the response is being sent
    response_sent_time, _ = get_time()

    # Return the transcription, detected language, and timing information
    print(f"Transcription: {transcription}, Language: {language}", flush=True)
    return jsonify({
        "transcription": transcription,
        "language": language,
        "request_received_time": request_received_time,
        "transcription_duration": transcription_duration,
        "response_sent_time": response_sent_time,
        "audio_file_size_bytes": audio_file_size
    }), 200

@app.route('/healthcheck', methods=['GET'])
def healthcheck():
    print("Received request at /healthcheck", flush=True)
    return jsonify({"status": "API is running"}), 200

if __name__ == '__main__':
    print("Starting Flask app...", flush=True)
    app.run(host="0.0.0.0", port=5000)