Spaces:
Running
Running
File size: 4,199 Bytes
f08a3f5 c634737 191aa05 7fedab7 78c4fc5 f08a3f5 c634737 f08a3f5 7fedab7 f08a3f5 c634737 3290c15 c634737 f08a3f5 191aa05 f08a3f5 cb49480 f08a3f5 534c98c f08a3f5 c0ed77c f08a3f5 7fedab7 c0ed77c f08a3f5 3a5956d f08a3f5 c0ed77c f08a3f5 c0ed77c f08a3f5 92f4908 f08a3f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
# app.py
import gradio as gr
from extract_text_from_pdf import PDFTextExtractor
from generate_transcript import TranscriptProcessor
from generate_audio import TTSGenerator
import pickle
import os
import tempfile
import shutil
import spaces
def create_temp_session_directory():
return tempfile.mkdtemp()
# Combined function to perform all steps sequentially
@spaces.GPU
def process_pdf_to_podcast(pdf_file):
session_dir = create_temp_session_directory()
# Define paths within the session directory
pdf_path = os.path.join(session_dir, "uploaded_pdf.pdf")
clean_text_path = os.path.join(session_dir, "clean_text.txt")
transcript_path = os.path.join(session_dir, "data.pkl")
tts_ready_path = os.path.join(session_dir, "podcast_ready_data.pkl")
audio_output_path = os.path.join(session_dir, "final_podcast_audio.mp3")
text_model= "llama3-70b-8192"
# Step 1: Extract Text from PDF
shutil.copy(pdf_file, pdf_path)
extractor = PDFTextExtractor(pdf_path,clean_text_path)
clean_text_path = extractor.clean_and_save_text()
# Display a preview of extracted text
with open(clean_text_path, 'r', encoding='utf-8') as file:
text_preview = file.read(500)
# Step 2: Generate Transcript
processor = TranscriptProcessor(clean_text_path,transcript_path,tts_ready_path,text_model)
transcript_path = processor.generate_transcript()
# Load the generated transcript for preview
with open(transcript_path, 'rb') as f:
transcript_preview = pickle.load(f)
# Step 3: Rewrite Transcript for TTS
tts_ready_path = processor.rewrite_transcript()
# Load the rewritten transcript for preview and editing
with open(tts_ready_path, 'rb') as f:
tts_ready_preview = pickle.load(f)
return (
f"Steps 1-3 completed. Preview and adjust the rewritten transcript if needed.",
text_preview,
transcript_preview,
tts_ready_preview,
session_dir
)
# Final Step: Generate Audio after optional adjustments
@spaces.GPU
def generate_audio_from_modified_text(tts_ready_text,session_dir):
tts_ready_path = os.path.join(session_dir, "podcast_ready_data.pkl")
audio_output_path = os.path.join(session_dir, "final_podcast_audio.mp3")
# Save any modified TTS-ready transcript
with open(tts_ready_path, 'wb') as f:
pickle.dump(tts_ready_text, f)
# Generate audio from the TTS-ready transcript
tts_gen = TTSGenerator(tts_ready_path,audio_output_path)
audio_path = tts_gen.generate_audio()
return f"Step 4 complete. Audio saved to {audio_path}.", audio_path
# Gradio Interface
with gr.Blocks() as app:
gr.Markdown("# PDF to Podcast Conversion Application")
# Single-click initiation of Steps 1-3
with gr.Row():
pdf_input = gr.File(label="Upload PDF")
run_all_button = gr.Button("Run All Steps (1-3)")
output_status = gr.Textbox(label="Status")
# Step 1 Preview of Extracted Text
extracted_text_preview = gr.Textbox(label="Extracted Text Preview (First 500 Characters)", interactive=False)
# Step 2 Preview of Generated Transcript
transcript_preview = gr.Textbox(label="Generated Transcript Preview", interactive=False)
# Step 3 Editable Rewritten Transcript for TTS
tts_ready_preview = gr.Textbox(label="Editable Rewritten Transcript for TTS", interactive=True)
# Button for generating audio with editable transcript
generate_audio_button = gr.Button("Generate Audio from Edited Transcript")
final_audio_output = gr.Audio(label="Generated Podcast Audio")
session_dir = gr.State()
# Step 1-3 execution
run_all_button.click(
process_pdf_to_podcast,
inputs=pdf_input,
outputs=[output_status, extracted_text_preview, transcript_preview, tts_ready_preview,session_dir]
)
# Final step: Generate Audio from modified TTS-ready transcript
generate_audio_button.click(
generate_audio_from_modified_text,
inputs=[tts_ready_preview, session_dir],
outputs=[output_status, final_audio_output]
)
app.launch()
|