Spaces:
Running
Running
File size: 4,578 Bytes
f08a3f5 bc135da f08a3f5 c634737 191aa05 fce33e5 9d002d8 78c4fc5 f08a3f5 c634737 f08a3f5 80a6d9f f08a3f5 c634737 3290c15 c634737 f08a3f5 191aa05 f08a3f5 cb49480 f08a3f5 534c98c f08a3f5 c0ed77c f08a3f5 bc135da c0ed77c bc135da c0ed77c f08a3f5 bc135da 9d002d8 f08a3f5 9d002d8 f08a3f5 c0ed77c f08a3f5 c0ed77c f08a3f5 92f4908 f08a3f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
# app.py
import gradio as gr
from extract_text_from_pdf import PDFTextExtractor
from generate_transcript import TranscriptProcessor
#from generate_audio import TTSGenerator
from generate_audio_edgetts import EdgeTTSGenerator
import pickle
import os
import tempfile
import shutil
import spaces
import asyncio
def create_temp_session_directory():
return tempfile.mkdtemp()
# Combined function to perform all steps sequentially
@spaces.GPU(duration=120)
def process_pdf_to_podcast(pdf_file):
session_dir = create_temp_session_directory()
# Define paths within the session directory
pdf_path = os.path.join(session_dir, "uploaded_pdf.pdf")
clean_text_path = os.path.join(session_dir, "clean_text.txt")
transcript_path = os.path.join(session_dir, "data.pkl")
tts_ready_path = os.path.join(session_dir, "podcast_ready_data.pkl")
audio_output_path = os.path.join(session_dir, "final_podcast_audio.mp3")
text_model= "llama3-70b-8192"
# Step 1: Extract Text from PDF
shutil.copy(pdf_file, pdf_path)
extractor = PDFTextExtractor(pdf_path,clean_text_path)
clean_text_path = extractor.clean_and_save_text()
# Display a preview of extracted text
with open(clean_text_path, 'r', encoding='utf-8') as file:
text_preview = file.read(500)
# Step 2: Generate Transcript
processor = TranscriptProcessor(clean_text_path,transcript_path,tts_ready_path,text_model)
transcript_path = processor.generate_transcript()
# Load the generated transcript for preview
with open(transcript_path, 'rb') as f:
transcript_preview = pickle.load(f)
# Step 3: Rewrite Transcript for TTS
tts_ready_path = processor.rewrite_transcript()
# Load the rewritten transcript for preview and editing
with open(tts_ready_path, 'rb') as f:
tts_ready_preview = pickle.load(f)
return (
f"Steps 1-3 completed. Preview and adjust the rewritten transcript if needed.",
text_preview,
transcript_preview,
tts_ready_preview,
session_dir
)
# Final Step: Generate Audio after optional adjustments
#@spaces.GPU(duration=300)
def generate_audio_from_modified_text(tts_ready_text,session_dir):
if not session_dir:
session_dir = create_temp_session_directory()
tts_ready_path = os.path.join(session_dir, "podcast_ready_data.pkl")
audio_output_path = os.path.join(session_dir, "final_podcast_audio.mp3")
# Save any modified TTS-ready transcript
with open(tts_ready_path, 'wb') as f:
pickle.dump(tts_ready_text, f)
# Generate audio from the TTS-ready transcript
#tts_gen = TTSGenerator(tts_ready_path,audio_output_path)
#audio_path = tts_gen.generate_audio()
#return f"Step 4 complete. Audio saved to {audio_path}.", audio_path
tts_gen = EdgeTTSGenerator(tts_ready_path,audio_output_path)
audio_path=asyncio.run(generator.generate_audio())
return f"Step 4 complete. Audio saved to {audio_path}.", audio_path
# Gradio Interface
with gr.Blocks() as app:
gr.Markdown("# PDF to Podcast Conversion Application")
# Single-click initiation of Steps 1-3
with gr.Row():
pdf_input = gr.File(label="Upload PDF")
run_all_button = gr.Button("Run All Steps (1-3)")
output_status = gr.Textbox(label="Status")
# Step 1 Preview of Extracted Text
extracted_text_preview = gr.Textbox(label="Extracted Text Preview (First 500 Characters)", interactive=False)
# Step 2 Preview of Generated Transcript
transcript_preview = gr.Textbox(label="Generated Transcript Preview", interactive=False)
# Step 3 Editable Rewritten Transcript for TTS
tts_ready_preview = gr.Textbox(label="Editable Rewritten Transcript for TTS", interactive=True)
# Button for generating audio with editable transcript
generate_audio_button = gr.Button("Generate Audio from Edited Transcript")
final_audio_output = gr.Audio(label="Generated Podcast Audio")
session_dir = gr.State()
# Step 1-3 execution
run_all_button.click(
process_pdf_to_podcast,
inputs=pdf_input,
outputs=[output_status, extracted_text_preview, transcript_preview, tts_ready_preview,session_dir]
)
# Final step: Generate Audio from modified TTS-ready transcript
generate_audio_button.click(
generate_audio_from_modified_text,
inputs=[tts_ready_preview, session_dir],
outputs=[output_status, final_audio_output]
)
app.launch()
|