from fastapi import FastAPI, File, UploadFile import librosa import numpy as np import shutil from funasr import AutoModel from funasr.utils.postprocess_utils import rich_transcription_postprocess app = FastAPI() # Load mô hình SenseVoiceSmall model_dir = "FunAudioLLM/SenseVoiceSmall" model = AutoModel( model=model_dir, vad_model="fsmn-vad", vad_kwargs={"max_single_segment_time": 30000}, device="cuda:0", hub="hf", ) # Hàm tính RMS energy def calculate_rms_energy(audio_path): y, sr = librosa.load(audio_path) rms = librosa.feature.rms(y=y)[0] return np.mean(rms) # Hàm phát hiện tiếng ồn def detect_noise(audio_path): rms_energy = calculate_rms_energy(audio_path) res = model.generate(input=audio_path, language="auto", audio_event_detection=True) audio_events = res[0].get("audio_event_detection", {}) if rms_energy > 0.02: return "ồn ào" elif rms_energy > 0.01: for event_label, event_score in audio_events.items(): if event_score > 0.7 and event_label in ["laughter", "applause", "crying", "coughing"]: return f"ồn ào ({event_label})" return "yên tĩnh" # API nhận file âm thanh từ Flutter @app.post("/detect-noise/") async def detect_noise_api(file: UploadFile = File(...)): file_path = f"temp/{file.filename}" with open(file_path, "wb") as buffer: shutil.copyfileobj(file.file, buffer) result = detect_noise(file_path) return {"noise_level": result}