import os import tempfile import shutil import ast import numpy as np import soundfile as sf import warnings import multiprocessing import concurrent.futures import urllib.request import pathlib try: from moshi.models.tts import TTSModel except ImportError: print("Moshi TTSModel not available β€” install Kyutai’s version via pip.") TTSModel = None from notebook_lm_kokoro import ( generate_podcast_script, generate_audio_from_script, generate_audio_kyutai, KPipeline, ) import sys # Diagnostic: where is ~/.cache pointing? print(f"[DEBUG] HOME = {os.environ.get('HOME')}") print(f"[DEBUG] XDG_CACHE_HOME = {os.environ.get('XDG_CACHE_HOME')}") print(f"[DEBUG] Trying to create /.cache/test.txt") try: os.makedirs("/.cache", exist_ok=True) with open("/.cache/test.txt", "w") as f: f.write("test") print("[DEBUG] Successfully wrote to /.cache") except Exception as e: print(f"[DEBUG] ❌ Failed to write to /.cache: {e}") # Set cache dirs BEFORE importing torch, transformers, or moshi os.environ["HF_HOME"] = "/tmp/huggingface" os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface/transformers" os.environ["XDG_CACHE_HOME"] = "/tmp/huggingface" os.environ["TORCH_HOME"] = "/tmp/torch" os.environ["MOSHI_CACHE_DIR"] = "/tmp/moshi" # Explicitly override ~/.cache os.environ["HOME"] = "/tmp/home" os.makedirs("/tmp/home", exist_ok=True) for path in [ "/tmp/.cache", "/tmp/huggingface", "/tmp/huggingface/transformers", "/tmp/torch", "/tmp/moshi", ]: os.makedirs(path, exist_ok=True) if not os.path.exists("/.cache"): try: os.symlink("/tmp/.cache", "/.cache") print("[DEBUG] Symlinked /.cache to /tmp/.cache") except Exception as e: print(f"[DEBUG] Couldn't symlink /.cache: {e}") import gradio as gr warnings.filterwarnings("ignore") NUM_WORKERS = multiprocessing.cpu_count() def ensure_gradio_frpc(): """ Ensures the frpc binary is present in the location Gradio expects. Avoids /.cache symlinks (which are not writable in HF Spaces). """ gradio_temp_dir = os.environ.get("GRADIO_TEMP_DIR", "/tmp/gradio") target_dir = os.path.join(gradio_temp_dir, "frpc") os.makedirs(target_dir, exist_ok=True) frpc_file = os.path.join(target_dir, "frpc_linux_amd64_v0.3") if not os.path.exists(frpc_file): print(f"[INFO] Downloading frpc binary to: {frpc_file}") try: url = "https://cdn-media.huggingface.co/frpc-gradio-0.3/frpc_linux_amd64" urllib.request.urlretrieve(url, frpc_file) os.chmod(frpc_file, 0o755) # Make it executable print("[SUCCESS] frpc binary downloaded and made executable.") except Exception as e: print(f"[ERROR] Failed to download frpc binary: {e}") else: print("[INFO] frpc binary already exists at expected path.") def process_segment(entry_and_voice_map): entry, voice_map = entry_and_voice_map speaker, dialogue = entry chosen_voice = voice_map.get(speaker, "af_heart") pipeline = KPipeline(lang_code="a", repo_id="hexgrad/Kokoro-82M") generator = pipeline(dialogue, voice=chosen_voice) return np.concatenate([audio for _, _, audio in generator], axis=0) if generator else None def generate_audio_from_script_with_voices(script, speaker1_voice, speaker2_voice, output_file): print("[DEBUG] Raw transcript string:") print(script) voice_map = {"Speaker 1": speaker1_voice, "Speaker 2": speaker2_voice} try: transcript_list = ast.literal_eval(script) if not isinstance(transcript_list, list): raise ValueError("Transcript is not a list") results = [] for entry in transcript_list: audio = process_segment((entry, voice_map)) if audio is not None: results.append(audio) if not results: return None sample_rate = 24000 pause = np.zeros(sample_rate, dtype=np.float32) final_audio = results[0] for seg in results[1:]: final_audio = np.concatenate((final_audio, pause, seg), axis=0) sf.write(output_file, final_audio, sample_rate) return output_file except Exception as e: print(f"Transcript parse error: {e}") return None def process_pdf(pdf_file, speaker1_voice, speaker2_voice, kyutai_voice1, kyutai_voice2, provider, openai_key=None, openrouter_key=None, openrouter_base=None, tts_engine=None): try: if provider == "openai" and not openai_key: return "OpenAI API key is required", None if provider == "openrouter" and not openrouter_key: return "OpenRouter API key is required", None if provider in ["openai", "kyutai"]: os.environ["OPENAI_API_KEY"] = openai_key or "" os.environ["OPENROUTER_API_BASE"] = "https://api.openai.com/v1" if provider in ["openrouter", "kyutai"]: os.environ["OPENAI_API_KEY"] = openrouter_key or "" os.environ["OPENROUTER_API_BASE"] = openrouter_base or "https://openrouter.ai/api/v1" if pdf_file is None: return "No file uploaded", None tmp_path = pdf_file.name script_provider = "openrouter" if provider == "kyutai" and openrouter_key else provider transcript, _ = generate_podcast_script(pdf_file.name, provider=script_provider) if transcript is None: return "Transcript generation failed: got None", None if not transcript.strip().startswith("["): return f"Malformed transcript:\n{transcript}", None audio_path = os.path.join(os.path.dirname(tmp_path), f"audio_{os.path.basename(tmp_path).replace('.pdf', '.wav')}") if tts_engine == "kyutai": result = generate_audio_kyutai(transcript, kyutai_voice1, kyutai_voice2, audio_path) else: result = generate_audio_from_script_with_voices(transcript, speaker1_voice, speaker2_voice, audio_path) return ("Process complete!", result) if result else ("Error generating audio", None) except Exception as e: print(f"process_pdf error: {e}") return f"Error: {e}", None def update_ui(provider, tts_engine): return [ gr.update(visible=tts_engine == "kokoro"), gr.update(visible=tts_engine == "kokoro"), gr.update(visible=tts_engine == "kyutai"), gr.update(visible=tts_engine == "kyutai"), gr.update(visible=provider in ["openai", "kyutai"]), gr.update(visible=provider in ["openrouter", "kyutai"]), gr.update(visible=provider == "openrouter"), ] def create_gradio_app(): css = ".gradio-container {max-width: 900px !important}" with gr.Blocks(css=css, theme=gr.themes.Soft()) as app: gr.Markdown("# 🎧 PDF to Podcast β€” NotebookLM + Kokoro/Kyutai") with gr.Row(): with gr.Column(scale=1.5): pdf_input = gr.File(file_types=[".pdf"], type="filepath", label="πŸ“„ Upload your PDF") provider = gr.Radio(["openai", "openrouter"], value="openrouter", label="🧠 API Provider") tts_engine = gr.Radio(["kokoro", "kyutai"], value="kokoro", label="🎀 TTS Engine") speaker1_voice = gr.Dropdown(["af_heart","af_bella","hf_beta"], value="af_heart", label="Speaker 1 Voice", visible=True) speaker2_voice = gr.Dropdown(["af_nicole","af_heart","bf_emma"], value="bf_emma", label="Speaker 2 Voice", visible=True) kyutai_voice1 = gr.Dropdown( [ "expresso/ex03-ex01_happy_001_channel1_334s.wav", "expresso/ex03-ex02_narration_001_channel1_674s.wav", "vctk/p226_023_mic1.wav" ], value="expresso/ex03-ex01_happy_001_channel1_334s.wav", label="Kyutai Voice 1", visible=True ) kyutai_voice2 = gr.Dropdown( [ "expresso/ex03-ex01_happy_001_channel1_334s.wav", "expresso/ex03-ex02_narration_001_channel1_674s.wav", "vctk/p225_023_mic1.wav" ], value="expresso/ex03-ex02_narration_001_channel1_674s.wav", label="Kyutai Voice 2", visible=True ) with gr.Accordion("πŸ” API Keys", open=True): openai_key = gr.Textbox(type="password", label="OpenAI Key", show_label=True, visible=True) openrouter_key = gr.Textbox(type="password", label="OpenRouter Key", show_label=True, visible=True) openrouter_base = gr.Textbox(placeholder="https://openrouter.ai/api/v1", label="OpenRouter Base URL", visible=True) submit_btn = gr.Button("πŸŽ™οΈ Generate Podcast", variant="primary") with gr.Column(scale=1): status_output = gr.Textbox(label="πŸ“ Status", interactive=False) audio_output = gr.Audio(type="filepath", label="🎡 Your Podcast") submit_btn.click( process_pdf, inputs=[pdf_input, speaker1_voice, speaker2_voice, kyutai_voice1, kyutai_voice2, provider, openai_key, openrouter_key, openrouter_base, tts_engine], outputs=[status_output, audio_output] ) provider.change(update_ui, [provider, tts_engine], [speaker1_voice, speaker2_voice, kyutai_voice1, kyutai_voice2, openai_key, openrouter_key, openrouter_base]) tts_engine.change(update_ui, [provider, tts_engine], [speaker1_voice, speaker2_voice, kyutai_voice1, kyutai_voice2, openai_key, openrouter_key, openrouter_base]) gr.Markdown(""" **πŸ“Œ Tips** - Pick your API provider and then set appropriate keys. - Choose **TTS Engine** (Kokoro/Kyutai) to reveal relevant voice options. - Works well with clean, structured PDFs. """) return app ensure_gradio_frpc() if __name__ == "__main__": create_gradio_app().queue().launch(server_name="0.0.0.0", server_port=7860, share=True, debug=True, pwa=True)