File size: 3,112 Bytes
a4a2363
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
"""
Created By: ishwor subedi
Date: 2024-07-31
"""
import os
import tempfile
from fastapi import Form
from fastapi import UploadFile, HTTPException, status
from src.models.apis_models import TextToSpeechRequest
from fastapi.routing import APIRouter
from src.pipeline.speech_transcription_pipeline import SpeechTranscriptionPipeline
from src import logging as logger
from src.utils.error_handling import create_success_response, raise_http_exception

speech_translator_router = APIRouter(tags=["SpeechTranscription"])
pipeline = SpeechTranscriptionPipeline()


@speech_translator_router.post(
    "/text_to_speech",
)
async def text_to_speech(request: TextToSpeechRequest):
    logger.info(f">>>text_to_speech API Triggered <<<")
    try:
        audio_bytes = pipeline.text_to_speech(request.text, request.lang, request.tld)
        if not audio_bytes:
            raise ValueError("Audio generation failed.")
        response = create_success_response(code=200, data={"audio": audio_bytes})
        logger.info(f">>>text_to_speech API success <<<")
        return response
    except ValueError as ve:
        logger.info(f">>>text_to_speech API failed {ve}<<<")
        raise_http_exception(code=400, message="Text to speech failed")

    except Exception as e:
        logger.error(f">>> Error processing text-to-speech {e}<<<")
        raise_http_exception(code=500, message="Internal server error")


@speech_translator_router.post(
    "/speech_to_text",

)
async def speech_to_text(audio: UploadFile, lang: str = Form(...)):
    logger.info(f">>>speech_to_text API Triggered <<<")
    try:
        audio_bytes = await audio.read()
        if not audio_bytes:
            logger.error(f">>> Empty audio file <<<")
            raise ValueError("Empty audio file")
    except Exception as e:
        logger.error(f">>> Invalid audio file {e}<<<")
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail="Invalid audio file"
        )

    try:
        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
            temp_audio_file.write(audio_bytes)
            temp_audio_file_path = temp_audio_file.name
    except Exception as e:
        logger.error(f">>> Error creating temporary file{e} <<<")
        raise_http_exception(code=500, message="Internal server error")

    try:
        transcript = pipeline.speech_to_text(temp_audio_file_path, lang)
        response = create_success_response(code=200, data={"transcript": transcript})
        logger.info(f">>>speech_to_text API success <<<")

        return response

    except FileNotFoundError:
        logger.error(f">>> Temporary file not found <<<")
        raise HTTPException(
            status_code=status.HTTP_404_NOT_FOUND,
            detail="Temporary file not found"
        )
    except Exception as e:
        logger.error(f">>> Error processing speech-to-text {e}<<<")
        raise_http_exception(code=500, message="Error processing speech-to-text")

    finally:
        if os.path.exists(temp_audio_file_path):
            os.remove(temp_audio_file_path)