import PyPDF2 from transformers import pipeline import gradio as gr # Function to extract text from PDF def extract_text_from_pdf(pdf_file): reader = PyPDF2.PdfReader(pdf_file) text = "" for page in reader.pages: if page and page.extract_text(): text += page.extract_text() return text # Load text-to-speech pipeline from Hugging Face tts = pipeline("text-to-speech", model="facebook/fastspeech2-en-ljspeech") # Function to convert PDF to audio with no text limit def pdf_to_audio(pdf_file): text = extract_text_from_pdf(pdf_file) if not text.strip(): return "", "No text found in PDF" audio = tts(text) audio_path = "output_audio.wav" with open(audio_path, "wb") as f: f.write(audio["audio"]) # Hugging Face TTS returns audio data return audio_path, "Audio generated successfully" # Gradio interface interface = gr.Interface( fn=pdf_to_audio, inputs=gr.File(type="file"), outputs=[gr.Audio(type="filepath"), gr.Text()] ) if __name__ == "__main__": interface.launch()