Spaces:
Running
on
A10G
Running
on
A10G
from typing import Optional | |
from fastapi import HTTPException | |
from config import logger | |
import io | |
import speech_recognition as sr | |
from gtts import gTTS | |
from pydub import AudioSegment | |
import base64 | |
def recognize_speech(audio_data: bytes, language: str = "en-US") -> str: | |
recognizer = sr.Recognizer() | |
try: | |
with io.BytesIO(audio_data) as audio_file: | |
with sr.AudioFile(audio_file) as source: | |
audio = recognizer.record(source) | |
text = recognizer.recognize_google(audio, language=language) | |
return text | |
except sr.UnknownValueError: | |
logger.error("Google Speech Recognition could not understand audio") | |
raise HTTPException(status_code=400, detail="Could not understand audio") | |
except sr.RequestError as e: | |
logger.error(f"Could not request results from Google Speech Recognition service; {e}") | |
raise HTTPException(status_code=503, detail="Speech recognition service unavailable") | |
except Exception as e: | |
logger.error(f"Error in speech recognition: {e}") | |
raise HTTPException(status_code=500, detail="Error processing speech") | |
def text_to_speech(text: str, language: str = "en", slow: bool = False) -> bytes: | |
try: | |
tts = gTTS(text=text, lang=language, slow=slow) | |
mp3_fp = io.BytesIO() | |
tts.write_to_fp(mp3_fp) | |
mp3_fp.seek(0) | |
return mp3_fp.read() | |
except Exception as e: | |
logger.error(f"Error in text-to-speech conversion: {e}") | |
raise HTTPException(status_code=500, detail="Error generating speech") | |
def extract_text_from_pdf(pdf_data: bytes) -> str: | |
try: | |
from PyPDF2 import PdfReader | |
pdf_reader = PdfReader(io.BytesIO(pdf_data)) | |
text = "" | |
for page in pdf_reader.pages: | |
text += page.extract_text() or "" | |
return clean_text_response(text) | |
except Exception as e: | |
logger.error(f"Error extracting text from PDF: {e}") | |
raise HTTPException(status_code=400, detail="Failed to extract text from PDF") |