from fastapi import FastAPI, File, UploadFile import librosa import numpy as np import shutil import uvicorn import os from funasr import AutoModel # Khởi tạo FastAPI app = FastAPI() # Tạo thư mục temp nếu chưa có if not os.path.exists("temp"): os.makedirs("temp") # Load mô hình SenseVoiceSmall từ Hugging Face model_dir = "FunAudioLLM/SenseVoiceSmall" model = AutoModel( model=model_dir, vad_model="fsmn-vad", vad_kwargs={"max_single_segment_time": 30000}, device="cuda:0", hub="hf", ) # Hàm tính RMS energy def calculate_rms_energy(audio_path): y, sr = librosa.load(audio_path) rms = librosa.feature.rms(y=y)[0] return np.mean(rms) # Hàm phát hiện tiếng ồn def detect_noise(audio_path): rms_energy = calculate_rms_energy(audio_path) res = model.generate(input=audio_path, language="auto", audio_event_detection=True) audio_events = res[0].get("audio_event_detection", {}) if rms_energy > 0.02: return "ồn ào" elif rms_energy > 0.01: for event_label, event_score in audio_events.items(): if event_score > 0.7 and event_label in ["laughter", "applause", "crying", "coughing"]: return f"ồn ào ({event_label})" return "yên tĩnh" # API nhận file âm thanh từ Flutter @app.post("/detect-noise/") async def detect_noise_api(file: UploadFile = File(...)): file_path = f"temp/{file.filename}" with open(file_path, "wb") as buffer: shutil.copyfileobj(file.file, buffer) result = detect_noise(file_path) return {"noise_level": result} # Chạy FastAPI trên Hugging Face Spaces if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860)