File size: 4,578 Bytes
f08a3f5
 
 
 
 
bc135da
 
f08a3f5
 
c634737
191aa05
fce33e5
9d002d8
78c4fc5
f08a3f5
c634737
 
 
 
f08a3f5
80a6d9f
f08a3f5
c634737
 
 
 
 
 
 
 
 
3290c15
c634737
f08a3f5
191aa05
f08a3f5
cb49480
f08a3f5
 
 
 
 
 
 
534c98c
f08a3f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0ed77c
 
f08a3f5
 
 
 
bc135da
c0ed77c
bc135da
 
 
 
c0ed77c
 
 
f08a3f5
 
 
 
 
bc135da
9d002d8
 
f08a3f5
9d002d8
 
f08a3f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0ed77c
 
f08a3f5
 
 
 
 
c0ed77c
f08a3f5
 
 
 
 
92f4908
f08a3f5
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# app.py

import gradio as gr
from extract_text_from_pdf import PDFTextExtractor
from generate_transcript import TranscriptProcessor
#from generate_audio import TTSGenerator
from generate_audio_edgetts import EdgeTTSGenerator
import pickle
import os
import tempfile
import shutil
import spaces
import asyncio



def create_temp_session_directory():
    return tempfile.mkdtemp()

# Combined function to perform all steps sequentially
@spaces.GPU(duration=120)
def process_pdf_to_podcast(pdf_file):

    session_dir = create_temp_session_directory()
    
    # Define paths within the session directory
    pdf_path = os.path.join(session_dir, "uploaded_pdf.pdf")
    clean_text_path = os.path.join(session_dir, "clean_text.txt")
    transcript_path = os.path.join(session_dir, "data.pkl")
    tts_ready_path = os.path.join(session_dir, "podcast_ready_data.pkl")
    audio_output_path = os.path.join(session_dir, "final_podcast_audio.mp3")
    text_model= "llama3-70b-8192"
    
    # Step 1: Extract Text from PDF
    shutil.copy(pdf_file, pdf_path)
    
    extractor = PDFTextExtractor(pdf_path,clean_text_path)
    clean_text_path = extractor.clean_and_save_text()
    
    # Display a preview of extracted text
    with open(clean_text_path, 'r', encoding='utf-8') as file:
        text_preview = file.read(500)
    
    # Step 2: Generate Transcript
    processor = TranscriptProcessor(clean_text_path,transcript_path,tts_ready_path,text_model)
    transcript_path = processor.generate_transcript()
    
    # Load the generated transcript for preview
    with open(transcript_path, 'rb') as f:
        transcript_preview = pickle.load(f)
    
    # Step 3: Rewrite Transcript for TTS
    tts_ready_path = processor.rewrite_transcript()
    
    # Load the rewritten transcript for preview and editing
    with open(tts_ready_path, 'rb') as f:
        tts_ready_preview = pickle.load(f)
    
    return (
        f"Steps 1-3 completed. Preview and adjust the rewritten transcript if needed.", 
        text_preview,
        transcript_preview,
        tts_ready_preview,
        session_dir 
    )


# Final Step: Generate Audio after optional adjustments
#@spaces.GPU(duration=300)
def generate_audio_from_modified_text(tts_ready_text,session_dir):
    
    if not session_dir:
        session_dir = create_temp_session_directory()
    
    tts_ready_path = os.path.join(session_dir, "podcast_ready_data.pkl")
    audio_output_path = os.path.join(session_dir, "final_podcast_audio.mp3")
    
    # Save any modified TTS-ready transcript
    with open(tts_ready_path, 'wb') as f:
        pickle.dump(tts_ready_text, f)
    
    # Generate audio from the TTS-ready transcript
    #tts_gen = TTSGenerator(tts_ready_path,audio_output_path)
    #audio_path = tts_gen.generate_audio()
    #return f"Step 4 complete. Audio saved to {audio_path}.", audio_path
    
    tts_gen = EdgeTTSGenerator(tts_ready_path,audio_output_path)
    audio_path=asyncio.run(generator.generate_audio())
    return f"Step 4 complete. Audio saved to {audio_path}.", audio_path


# Gradio Interface
with gr.Blocks() as app:
    gr.Markdown("# PDF to Podcast Conversion Application")
    
    # Single-click initiation of Steps 1-3
    with gr.Row():
        pdf_input = gr.File(label="Upload PDF")
        run_all_button = gr.Button("Run All Steps (1-3)")
        output_status = gr.Textbox(label="Status")
    
    # Step 1 Preview of Extracted Text
    extracted_text_preview = gr.Textbox(label="Extracted Text Preview (First 500 Characters)", interactive=False)
    
    # Step 2 Preview of Generated Transcript
    transcript_preview = gr.Textbox(label="Generated Transcript Preview", interactive=False)
    
    # Step 3 Editable Rewritten Transcript for TTS
    tts_ready_preview = gr.Textbox(label="Editable Rewritten Transcript for TTS", interactive=True)
    
    # Button for generating audio with editable transcript
    generate_audio_button = gr.Button("Generate Audio from Edited Transcript")
    final_audio_output = gr.Audio(label="Generated Podcast Audio")

    session_dir = gr.State() 
    
    # Step 1-3 execution
    run_all_button.click(
        process_pdf_to_podcast, 
        inputs=pdf_input, 
        outputs=[output_status, extracted_text_preview, transcript_preview, tts_ready_preview,session_dir]
    )

    # Final step: Generate Audio from modified TTS-ready transcript
    generate_audio_button.click(
        generate_audio_from_modified_text, 
        inputs=[tts_ready_preview, session_dir],
        outputs=[output_status, final_audio_output]
    )

app.launch()