from fastapi import FastAPI, UploadFile, File, Response from transformers import pipeline import librosa from deep_translator import GoogleTranslator import io app = FastAPI() # print("Loading Speech Recognition") # print("Speech Recognition Loaded") print("Loading translator") translator = GoogleTranslator(source='ku', target='fr') print("Translator loaded") # print("Loading tts") # print("TTS loaded") def speech2text(audio_data: bytes): audio_array, _ = librosa.load(io.BytesIO(audio_data), sr=16000) pipe = pipeline("automatic-speech-recognition", model="Akashpb13/xlsr_kurmanji_kurdish") output = pipe(audio_array) return output["text"] def text2speech(text:str): tts = pipeline("text-to-audio", model="roshna-omer/speecht5_tts_krd-kmr_CV17.0") output = tts(text) return output["audio"] @app.post("/transcribe") async def transcribe(file: UploadFile = File(...)): audio_data = await file.read() text_output = speech2text(audio_data) translated = translator.translate(text_output) return {"text": text_output, "translation": translated} @app.post("/transcribe_audio") async def transcribe_and_return_audio(file: UploadFile = File(...)): audio_data = await file.read() text_output = speech2text(audio_data) audio_output = text2speech(text_output) return Response(content=audio_output, media_type="audio/wav")