|
|
|
import os |
|
import tempfile |
|
import gradio as gr |
|
from notebook_lm_kokoro import generate_podcast_script, KPipeline |
|
import soundfile as sf |
|
import numpy as np |
|
import ast |
|
import shutil |
|
import warnings |
|
import os |
|
import gradio as gr |
|
from notebook_lm_kokoro import generate_podcast_script, generate_audio_from_script |
|
warnings.filterwarnings("ignore") |
|
|
|
|
|
def generate_audio_from_script_with_voices(script, speaker1_voice, speaker2_voice, output_file): |
|
voice_map = {"Speaker 1": speaker1_voice, "Speaker 2": speaker2_voice} |
|
|
|
|
|
script = script.strip() |
|
if not script.startswith("[") or not script.endswith("]"): |
|
print("Invalid transcript format. Expected a list of tuples.") |
|
return None |
|
|
|
try: |
|
transcript_list = ast.literal_eval(script) |
|
if not isinstance(transcript_list, list): |
|
raise ValueError("Transcript is not a list") |
|
|
|
all_audio_segments = [] |
|
|
|
for i, entry in enumerate(transcript_list): |
|
if not isinstance(entry, tuple) or len(entry) != 2: |
|
print(f"Skipping invalid entry {i}: {entry}") |
|
continue |
|
|
|
speaker, dialogue = entry |
|
chosen_voice = voice_map.get(speaker, "af_heart") |
|
print(f"Generating audio for {speaker} with voice '{chosen_voice}'...") |
|
|
|
pipeline = KPipeline(lang_code="a") |
|
generator = pipeline(dialogue, voice=chosen_voice) |
|
|
|
segment_audio = [] |
|
for j, (gs, ps, audio) in enumerate(generator): |
|
|
|
segment_audio.append(audio) |
|
|
|
if segment_audio: |
|
segment_full = np.concatenate(segment_audio, axis=0) |
|
all_audio_segments.append(segment_full) |
|
|
|
if not all_audio_segments: |
|
print("No audio segments were generated.") |
|
return None |
|
|
|
|
|
sample_rate = 24000 |
|
pause = np.zeros(sample_rate, dtype=np.float32) |
|
final_audio = all_audio_segments[0] |
|
for seg in all_audio_segments[1:]: |
|
final_audio = np.concatenate((final_audio, pause, seg), axis=0) |
|
|
|
sf.write(output_file, final_audio, sample_rate) |
|
print(f"Saved final audio as {output_file}") |
|
return output_file |
|
|
|
except Exception as e: |
|
print(f"Error processing transcript: {e}") |
|
return None |
|
|
|
|
|
def process_pdf(pdf_file, speaker1_voice, speaker2_voice, provider, api_key, openrouter_base=None): |
|
"""Process the uploaded PDF file and generate audio""" |
|
try: |
|
|
|
|
|
if provider == "openai": |
|
os.environ["OPENAI_API_KEY"] = api_key |
|
os.environ["OPENROUTER_API_BASE"] = "https://api.openai.com/v1" |
|
else: |
|
os.environ["OPENAI_API_KEY"] = api_key |
|
os.environ["OPENROUTER_API_BASE"] = openrouter_base or "https://openrouter.ai/api/v1" |
|
|
|
if pdf_file is None: |
|
return "No file uploaded", None |
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: |
|
|
|
shutil.copy2(pdf_file.name, tmp.name) |
|
tmp_path = tmp.name |
|
|
|
print(f"Uploaded PDF saved at {tmp_path}") |
|
|
|
|
|
transcript, transcript_path = generate_podcast_script(tmp_path, provider=provider) |
|
if transcript is None: |
|
return "Error generating transcript", None |
|
|
|
|
|
audio_output_path = os.path.join( |
|
os.path.dirname(tmp_path), |
|
f"audio_{os.path.basename(tmp_path).replace('.pdf', '.wav')}" |
|
) |
|
|
|
result = generate_audio_from_script_with_voices( |
|
transcript, |
|
speaker1_voice, |
|
speaker2_voice, |
|
output_file=audio_output_path |
|
) |
|
|
|
if result is None: |
|
return "Error generating audio", None |
|
|
|
return "Process complete!", result |
|
|
|
except Exception as e: |
|
print(f"Error in process_pdf: {str(e)}") |
|
return f"Error processing file: {str(e)}", None |
|
|
|
|
|
def create_gradio_app(): |
|
|
|
css = """ |
|
.gradio-container {max-width: 900px !important} |
|
""" |
|
|
|
with gr.Blocks(css=css, theme=gr.themes.Soft()) as app: |
|
gr.Markdown( |
|
""" |
|
# π NotebookLM-Kokoro TTS App |
|
Upload a PDF, choose voices, and generate conversational audio using Kokoro TTS. |
|
""" |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
pdf_input = gr.File( |
|
label="Upload PDF Document", |
|
file_types=[".pdf"], |
|
type="filepath" |
|
) |
|
|
|
with gr.Row(): |
|
speaker1_voice = gr.Dropdown( |
|
choices=["af_heart", "af_bella", "hf_beta"], |
|
value="af_heart", |
|
label="Speaker 1 Voice" |
|
) |
|
speaker2_voice = gr.Dropdown( |
|
choices=["af_nicole", "af_heart", "bf_emma"], |
|
value="af_nicole", |
|
label="Speaker 2 Voice" |
|
) |
|
|
|
provider = gr.Radio( |
|
choices=["openai", "openrouter"], |
|
value="openrouter", |
|
label="API Provider" |
|
) |
|
|
|
with gr.Group(): |
|
provider = gr.Radio( |
|
choices=["openai", "openrouter"], |
|
value="openrouter", |
|
label="API Provider" |
|
) |
|
|
|
api_key = gr.Textbox( |
|
label="API Key", |
|
placeholder="Enter your API key here...", |
|
type="password", |
|
class_name="api-key-input" |
|
) |
|
|
|
openrouter_base = gr.Textbox( |
|
label="OpenRouter Base URL (optional)", |
|
placeholder="https://openrouter.ai/api/v1", |
|
visible=False |
|
) |
|
|
|
|
|
def toggle_openrouter_base(provider_choice): |
|
return gr.update(visible=provider_choice == "openrouter") |
|
|
|
provider.change( |
|
fn=toggle_openrouter_base, |
|
inputs=[provider], |
|
outputs=[openrouter_base] |
|
) |
|
|
|
submit_btn = gr.Button("ποΈ Generate Audio", variant="primary") |
|
|
|
with gr.Column(scale=2): |
|
status_output = gr.Textbox( |
|
label="Status", |
|
placeholder="Processing status will appear here..." |
|
) |
|
audio_output = gr.Audio( |
|
label="Generated Audio", |
|
type="filepath" |
|
) |
|
|
|
|
|
gr.Examples( |
|
examples=[ |
|
["sample.pdf", "af_heart", "af_nicole", "openrouter"], |
|
], |
|
inputs=[pdf_input, speaker1_voice, speaker2_voice, provider], |
|
outputs=[status_output, audio_output], |
|
fn=process_pdf, |
|
cache_examples=True, |
|
) |
|
|
|
submit_btn.click( |
|
fn=process_pdf, |
|
inputs=[pdf_input, speaker1_voice, speaker2_voice, provider], |
|
outputs=[status_output, audio_output], |
|
api_name="generate" |
|
) |
|
|
|
gr.Markdown( |
|
""" |
|
### π Notes |
|
- Make sure your PDF is readable and contains text (not scanned images) |
|
- Processing large PDFs may take a few minutes |
|
- You need a valid OpenAI/OpenRouter API key set as environment variable |
|
""" |
|
) |
|
|
|
return app |
|
|
|
if __name__ == "__main__": |
|
demo = create_gradio_app() |
|
demo.queue(concurrency_count=1).launch( |
|
server_name="0.0.0.0", |
|
server_port=7860, |
|
share=True, |
|
debug=True |
|
) |