from typing import Optional from fastapi import HTTPException from config import logger import io import speech_recognition as sr from gtts import gTTS from pydub import AudioSegment import base64 from utils import clean_text_response # Added this import def recognize_speech(audio_data: bytes, language: str = "en-US") -> str: recognizer = sr.Recognizer() try: with io.BytesIO(audio_data) as audio_file: with sr.AudioFile(audio_file) as source: audio = recognizer.record(source) text = recognizer.recognize_google(audio, language=language) return text except sr.UnknownValueError: logger.error("Google Speech Recognition could not understand audio") raise HTTPException(status_code=400, detail="Could not understand audio") except sr.RequestError as e: logger.error(f"Could not request results from Google Speech Recognition service; {e}") raise HTTPException(status_code=503, detail="Speech recognition service unavailable") except Exception as e: logger.error(f"Error in speech recognition: {e}") raise HTTPException(status_code=500, detail="Error processing speech") def text_to_speech(text: str, language: str = "en", slow: bool = False) -> bytes: try: tts = gTTS(text=text, lang=language, slow=slow) mp3_fp = io.BytesIO() tts.write_to_fp(mp3_fp) mp3_fp.seek(0) return mp3_fp.read() except Exception as e: logger.error(f"Error in text-to-speech conversion: {e}") raise HTTPException(status_code=500, detail="Error generating speech") def extract_text_from_pdf(pdf_data: bytes) -> str: try: from PyPDF2 import PdfReader pdf_reader = PdfReader(io.BytesIO(pdf_data)) text = "" for page in pdf_reader.pages: text += page.extract_text() or "" return clean_text_response(text) # Now works with the import except Exception as e: logger.error(f"Error extracting text from PDF: {e}") raise HTTPException(status_code=400, detail="Failed to extract text from PDF")