|
from typing import Optional |
|
from fastapi import HTTPException |
|
from config import logger |
|
import io |
|
import speech_recognition as sr |
|
from gtts import gTTS |
|
from pydub import AudioSegment |
|
import base64 |
|
from utils import clean_text_response |
|
|
|
def recognize_speech(audio_data: bytes, language: str = "en-US") -> str: |
|
recognizer = sr.Recognizer() |
|
try: |
|
with io.BytesIO(audio_data) as audio_file: |
|
with sr.AudioFile(audio_file) as source: |
|
audio = recognizer.record(source) |
|
text = recognizer.recognize_google(audio, language=language) |
|
return text |
|
except sr.UnknownValueError: |
|
logger.error("Google Speech Recognition could not understand audio") |
|
raise HTTPException(status_code=400, detail="Could not understand audio") |
|
except sr.RequestError as e: |
|
logger.error(f"Could not request results from Google Speech Recognition service; {e}") |
|
raise HTTPException(status_code=503, detail="Speech recognition service unavailable") |
|
except Exception as e: |
|
logger.error(f"Error in speech recognition: {e}") |
|
raise HTTPException(status_code=500, detail="Error processing speech") |
|
|
|
def text_to_speech(text: str, language: str = "en", slow: bool = False) -> bytes: |
|
try: |
|
tts = gTTS(text=text, lang=language, slow=slow) |
|
mp3_fp = io.BytesIO() |
|
tts.write_to_fp(mp3_fp) |
|
mp3_fp.seek(0) |
|
return mp3_fp.read() |
|
except Exception as e: |
|
logger.error(f"Error in text-to-speech conversion: {e}") |
|
raise HTTPException(status_code=500, detail="Error generating speech") |
|
|
|
def extract_text_from_pdf(pdf_data: bytes) -> str: |
|
try: |
|
from PyPDF2 import PdfReader |
|
pdf_reader = PdfReader(io.BytesIO(pdf_data)) |
|
text = "" |
|
for page in pdf_reader.pages: |
|
text += page.extract_text() or "" |
|
return clean_text_response(text) |
|
except Exception as e: |
|
logger.error(f"Error extracting text from PDF: {e}") |
|
raise HTTPException(status_code=400, detail="Failed to extract text from PDF") |