from flask import Flask, request, jsonify, abort import whisper import os from flask_cors import CORS from tempfile import NamedTemporaryFile # Load Whisper model print("\nLoading Whisper\n", flush=True) model = whisper.load_model("small") # Initialize Flask app app = Flask(__name__) CORS(app) print("\nHello, welcome to SemaBox\n", flush=True) def transcribe(audio): #time.sleep(3) # load audio and pad/trim it to fit 30 seconds audio = whisper.load_audio(audio) audio = whisper.pad_or_trim(audio) # make log-Mel spectrogram and move to the same device as the model mel = whisper.log_mel_spectrogram(audio).to(model.device) # detect the spoken language _, probs = model.detect_language(mel) print(f"Detected language: {max(probs, key=probs.get)}") # decode the audio options = whisper.DecodingOptions(fp16 = False) result = whisper.decode(model, mel, options) return result.text @app.route("/") def hello(): return "Semabox, listens to you!" @app.route('/whisper', methods=['POST']) def transcribe_audio(): if 'audio' not in request.files: # If no audio file is submitted, return a 400 (Bad Request) error. abort(400, description="No audio file provided") audio_file = request.files['audio'] # Create a temporary file to save the uploaded audio. with NamedTemporaryFile(suffix=".wav", delete=True) as temp: audio_file.save(temp.name) # Perform transcription using the Whisper model. result = model.transcribe(temp.name) # Return the transcribed text in JSON format. return jsonify({ 'filename': audio_file.filename, 'transcript': result['text'], }) # Define the route for transcription @app.route('/transcribe', methods=['POST']) def transcribe_audio(): # Check if an audio file is included in the request if 'audio' not in request.files: return jsonify({"error": "No audio file provided"}), 400 audio_file = request.files['audio'] # Save the uploaded audio file audio_path = os.path.join("temp_audio", audio_file.filename) audio_file.save(audio_path) # Transcribe the audio transcription, language = transcribe(audio_path) # Clean up the saved file os.remove(audio_path) # Return the transcription and detected language return jsonify({"transcription": transcription, "language": language}), 200 # Healthcheck endpoint @app.route('/healthcheck', methods=['GET']) def healthcheck(): return jsonify({"status": "API is running"}), 200 # Run the Flask app if __name__ == '__main__': app.run(host="0.0.0.0", port=5000)