File size: 4,199 Bytes
f08a3f5
 
 
 
 
 
 
 
c634737
191aa05
7fedab7
78c4fc5
f08a3f5
c634737
 
 
 
f08a3f5
7fedab7
f08a3f5
c634737
 
 
 
 
 
 
 
 
3290c15
c634737
f08a3f5
191aa05
f08a3f5
cb49480
f08a3f5
 
 
 
 
 
 
534c98c
f08a3f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0ed77c
 
f08a3f5
 
 
 
7fedab7
c0ed77c
 
 
 
 
f08a3f5
 
 
 
 
3a5956d
f08a3f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0ed77c
 
f08a3f5
 
 
 
 
c0ed77c
f08a3f5
 
 
 
 
92f4908
f08a3f5
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# app.py

import gradio as gr
from extract_text_from_pdf import PDFTextExtractor
from generate_transcript import TranscriptProcessor
from generate_audio import TTSGenerator
import pickle
import os
import tempfile
import shutil
import spaces



def create_temp_session_directory():
    return tempfile.mkdtemp()

# Combined function to perform all steps sequentially
@spaces.GPU
def process_pdf_to_podcast(pdf_file):

    session_dir = create_temp_session_directory()
    
    # Define paths within the session directory
    pdf_path = os.path.join(session_dir, "uploaded_pdf.pdf")
    clean_text_path = os.path.join(session_dir, "clean_text.txt")
    transcript_path = os.path.join(session_dir, "data.pkl")
    tts_ready_path = os.path.join(session_dir, "podcast_ready_data.pkl")
    audio_output_path = os.path.join(session_dir, "final_podcast_audio.mp3")
    text_model= "llama3-70b-8192"
    
    # Step 1: Extract Text from PDF
    shutil.copy(pdf_file, pdf_path)
    
    extractor = PDFTextExtractor(pdf_path,clean_text_path)
    clean_text_path = extractor.clean_and_save_text()
    
    # Display a preview of extracted text
    with open(clean_text_path, 'r', encoding='utf-8') as file:
        text_preview = file.read(500)
    
    # Step 2: Generate Transcript
    processor = TranscriptProcessor(clean_text_path,transcript_path,tts_ready_path,text_model)
    transcript_path = processor.generate_transcript()
    
    # Load the generated transcript for preview
    with open(transcript_path, 'rb') as f:
        transcript_preview = pickle.load(f)
    
    # Step 3: Rewrite Transcript for TTS
    tts_ready_path = processor.rewrite_transcript()
    
    # Load the rewritten transcript for preview and editing
    with open(tts_ready_path, 'rb') as f:
        tts_ready_preview = pickle.load(f)
    
    return (
        f"Steps 1-3 completed. Preview and adjust the rewritten transcript if needed.", 
        text_preview,
        transcript_preview,
        tts_ready_preview,
        session_dir 
    )


# Final Step: Generate Audio after optional adjustments
@spaces.GPU
def generate_audio_from_modified_text(tts_ready_text,session_dir):

    tts_ready_path = os.path.join(session_dir, "podcast_ready_data.pkl")
    audio_output_path = os.path.join(session_dir, "final_podcast_audio.mp3")
    
    # Save any modified TTS-ready transcript
    with open(tts_ready_path, 'wb') as f:
        pickle.dump(tts_ready_text, f)
    
    # Generate audio from the TTS-ready transcript
    tts_gen = TTSGenerator(tts_ready_path,audio_output_path)
    audio_path = tts_gen.generate_audio()
    
    return f"Step 4 complete. Audio saved to {audio_path}.", audio_path


# Gradio Interface
with gr.Blocks() as app:
    gr.Markdown("# PDF to Podcast Conversion Application")
    
    # Single-click initiation of Steps 1-3
    with gr.Row():
        pdf_input = gr.File(label="Upload PDF")
        run_all_button = gr.Button("Run All Steps (1-3)")
        output_status = gr.Textbox(label="Status")
    
    # Step 1 Preview of Extracted Text
    extracted_text_preview = gr.Textbox(label="Extracted Text Preview (First 500 Characters)", interactive=False)
    
    # Step 2 Preview of Generated Transcript
    transcript_preview = gr.Textbox(label="Generated Transcript Preview", interactive=False)
    
    # Step 3 Editable Rewritten Transcript for TTS
    tts_ready_preview = gr.Textbox(label="Editable Rewritten Transcript for TTS", interactive=True)
    
    # Button for generating audio with editable transcript
    generate_audio_button = gr.Button("Generate Audio from Edited Transcript")
    final_audio_output = gr.Audio(label="Generated Podcast Audio")

    session_dir = gr.State() 
    
    # Step 1-3 execution
    run_all_button.click(
        process_pdf_to_podcast, 
        inputs=pdf_input, 
        outputs=[output_status, extracted_text_preview, transcript_preview, tts_ready_preview,session_dir]
    )

    # Final step: Generate Audio from modified TTS-ready transcript
    generate_audio_button.click(
        generate_audio_from_modified_text, 
        inputs=[tts_ready_preview, session_dir],
        outputs=[output_status, final_audio_output]
    )

app.launch()