File size: 3,576 Bytes
f08a3f5
 
 
 
 
 
 
 
8284962
f08a3f5
 
 
 
 
 
 
 
 
 
8284962
f08a3f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8284962
f08a3f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# app.py

import gradio as gr
from extract_text_from_pdf import PDFTextExtractor
from generate_transcript import TranscriptProcessor
from generate_audio import TTSGenerator
import pickle
import os
import spaces

# Define paths
pdf_path = './resources/uploaded_pdf.pdf'
clean_text_path = './resources/clean_text.txt'
transcript_path = './resources/data.pkl'
tts_ready_path = './resources/podcast_ready_data.pkl'
audio_output_path = 'final_podcast_audio.mp3'


# Combined function to perform all steps sequentially
@spaces.GPU
def process_pdf_to_podcast(pdf_file):
    # Step 1: Extract Text from PDF
    with open(pdf_path, 'wb') as f:
        f.write(pdf_file.read())
    
    extractor = PDFTextExtractor(pdf_path)
    clean_text_path = extractor.clean_and_save_text()
    
    # Display a preview of extracted text
    with open(clean_text_path, 'r', encoding='utf-8') as file:
        text_preview = file.read(500)
    
    # Step 2: Generate Transcript
    processor = TranscriptProcessor(clean_text_path)
    transcript_path = processor.generate_transcript()
    
    # Load the generated transcript for preview
    with open(transcript_path, 'rb') as f:
        transcript_preview = pickle.load(f)
    
    # Step 3: Rewrite Transcript for TTS
    tts_ready_path = processor.rewrite_transcript()
    
    # Load the rewritten transcript for preview and editing
    with open(tts_ready_path, 'rb') as f:
        tts_ready_preview = pickle.load(f)
    
    return (
        f"Steps 1-3 completed. Preview and adjust the rewritten transcript if needed.", 
        text_preview,
        transcript_preview,
        tts_ready_preview
    )


# Final Step: Generate Audio after optional adjustments
@spaces.GPU
def generate_audio_from_modified_text(tts_ready_text):
    # Save any modified TTS-ready transcript
    with open(tts_ready_path, 'wb') as f:
        pickle.dump(tts_ready_text, f)
    
    # Generate audio from the TTS-ready transcript
    tts_gen = TTSGenerator(tts_ready_path)
    audio_path = tts_gen.generate_audio()
    
    return f"Step 4 complete. Audio saved to {audio_path}.", audio_path


# Gradio Interface
with gr.Blocks() as app:
    gr.Markdown("# PDF to Podcast Conversion Application")
    
    # Single-click initiation of Steps 1-3
    with gr.Row():
        pdf_input = gr.File(label="Upload PDF")
        run_all_button = gr.Button("Run All Steps (1-3)")
        output_status = gr.Textbox(label="Status")
    
    # Step 1 Preview of Extracted Text
    extracted_text_preview = gr.Textbox(label="Extracted Text Preview (First 500 Characters)", interactive=False)
    
    # Step 2 Preview of Generated Transcript
    transcript_preview = gr.Textbox(label="Generated Transcript Preview", interactive=False)
    
    # Step 3 Editable Rewritten Transcript for TTS
    tts_ready_preview = gr.Textbox(label="Editable Rewritten Transcript for TTS", interactive=True)
    
    # Button for generating audio with editable transcript
    generate_audio_button = gr.Button("Generate Audio from Edited Transcript")
    final_audio_output = gr.Audio(label="Generated Podcast Audio")
    
    # Step 1-3 execution
    run_all_button.click(
        process_pdf_to_podcast, 
        inputs=pdf_input, 
        outputs=[output_status, extracted_text_preview, transcript_preview, tts_ready_preview]
    )

    # Final step: Generate Audio from modified TTS-ready transcript
    generate_audio_button.click(
        generate_audio_from_modified_text, 
        inputs=tts_ready_preview, 
        outputs=[output_status, final_audio_output]
    )

app.launch()