File size: 3,722 Bytes
d420f32 a414737 d420f32 a414737 d420f32 a414737 d420f32 a414737 d420f32 a414737 d420f32 a414737 d420f32 a414737 d420f32 a414737 d420f32 a414737 d420f32 a414737 d420f32 a414737 d420f32 a414737 d420f32 a414737 d420f32 a414737 d420f32 a414737 d420f32 a414737 d420f32 a414737 d420f32 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
from flask import Flask, request, jsonify
from datetime import datetime
import whisper
import os
import time
app = Flask(__name__)
@app.route("/")
def hello():
return "Semabox, listens to you!"
# Load the Whisper model
print("Loading Whisper model...", flush=True)
model = whisper.load_model("tiny")
print("Whisper model loaded.", flush=True)
def transcribe(audio_path):
print(f"Transcribing audio from: {audio_path}", flush=True)
# Load audio and pad/trim it to fit 30 seconds
print("Loading and processing audio...", flush=True)
audio = whisper.load_audio(audio_path)
audio = whisper.pad_or_trim(audio)
# Make log-Mel spectrogram and move to the same device as the model
print("Creating log-Mel spectrogram...", flush=True)
mel = whisper.log_mel_spectrogram(audio).to(model.device)
# Detect the spoken language
print("Detecting language...", flush=True)
_, probs = model.detect_language(mel)
language = max(probs, key=probs.get)
print(f"Detected language: {language}", flush=True)
# Decode the audio
print("Decoding audio...", flush=True)
options = whisper.DecodingOptions(fp16=False)
result = whisper.decode(model, mel, options)
print("Transcription complete.", flush=True)
return result.text, language
@app.route('/transcribe', methods=['POST'])
def transcribe_audio():
# Record the time when the request was received
request_received_time = datetime.now()
print(f"Received request at /transcribe at {request_received_time}", flush=True)
if 'audio' not in request.files:
print("Error: No audio file provided")
return jsonify({"error": "No audio file provided"}), 400
audio_file = request.files['audio']
audio_file_size = len(audio_file.read()) # Calculate the size of the file in bytes
audio_file.seek(0) # Reset the file pointer after reading
# Save the uploaded audio file
audio_path = os.path.join("temp_audio", audio_file.filename)
os.makedirs("temp_audio", exist_ok=True)
audio_file.save(audio_path)
print(f"Audio file saved to: {audio_path} (Size: {audio_file_size} bytes)", flush=True)
# Record the time before starting transcription
transcription_start_time = time.time()
# Transcribe the audio
try:
transcription, language = transcribe(audio_path)
except Exception as e:
print(f"Error during transcription: {str(e)}", flush=True)
return jsonify({"error": f"An error occurred: {str(e)}"}), 500
# Calculate the time taken for transcription
transcription_end_time = time.time()
transcription_duration = transcription_end_time - transcription_start_time
# Clean up the saved file
os.remove(audio_path)
print(f"Audio file removed from: {audio_path}", flush=True)
# Record the time when the response is being sent
response_sent_time = datetime.now()
# Return the transcription, detected language, and timing information
print(f"Transcription: {transcription}, Language: {language}", flush=True)
return jsonify({
"transcription": transcription,
"language": language,
"request_received_time": request_received_time.isoformat(),
"transcription_duration": transcription_duration,
"response_sent_time": response_sent_time.isoformat(),
"audio_file_size_bytes": audio_file_size
}), 200
@app.route('/healthcheck', methods=['GET'])
def healthcheck():
print("Received request at /healthcheck", flush=True)
return jsonify({"status": "API is running"}), 200
if __name__ == '__main__':
print("Starting Flask app...", flush=True)
app.run(host="0.0.0.0", port=5000)
|