updated app and Dockerfile
Browse files- Dockerfile +24 -0
- gradio_app.py +112 -25
- requirements.txt +8 -0
    	
        Dockerfile
    ADDED
    
    | @@ -0,0 +1,24 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            FROM python:3.11-slim
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            WORKDIR /app
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            # Install system dependencies
         | 
| 6 | 
            +
            RUN apt-get update && apt-get install -y \
         | 
| 7 | 
            +
                espeak-ng \
         | 
| 8 | 
            +
                && rm -rf /var/lib/apt/lists/*
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            # Copy requirements and install Python dependencies
         | 
| 11 | 
            +
            COPY requirements.txt .
         | 
| 12 | 
            +
            RUN pip install --no-cache-dir -r requirements.txt
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            # Copy application files
         | 
| 15 | 
            +
            COPY . .
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            # Set environment variables
         | 
| 18 | 
            +
            ENV PYTHONUNBUFFERED=1
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            # Expose port
         | 
| 21 | 
            +
            EXPOSE 7860
         | 
| 22 | 
            +
             | 
| 23 | 
            +
            # Command to run the application
         | 
| 24 | 
            +
            CMD ["python", "gradio_app.py"]
         | 
    	
        gradio_app.py
    CHANGED
    
    | @@ -8,6 +8,9 @@ import numpy as np | |
| 8 | 
             
            import ast
         | 
| 9 | 
             
            import shutil
         | 
| 10 | 
             
            import warnings
         | 
|  | |
|  | |
|  | |
| 11 | 
             
            warnings.filterwarnings("ignore")
         | 
| 12 |  | 
| 13 | 
             
            # A modified version of generate_audio_from_script to accept voice mapping
         | 
| @@ -68,9 +71,17 @@ def generate_audio_from_script_with_voices(script, speaker1_voice, speaker2_voic | |
| 68 | 
             
                    return None
         | 
| 69 |  | 
| 70 |  | 
| 71 | 
            -
            def process_pdf(pdf_file, speaker1_voice, speaker2_voice, provider):
         | 
| 72 | 
             
                """Process the uploaded PDF file and generate audio"""
         | 
| 73 | 
             
                try:
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 74 | 
             
                    # Check if we received a valid file
         | 
| 75 | 
             
                    if pdf_file is None:
         | 
| 76 | 
             
                        return "No file uploaded", None
         | 
| @@ -112,45 +123,121 @@ def process_pdf(pdf_file, speaker1_voice, speaker2_voice, provider): | |
| 112 |  | 
| 113 |  | 
| 114 | 
             
            def create_gradio_app():
         | 
| 115 | 
            -
                 | 
| 116 | 
            -
             | 
| 117 | 
            -
             | 
| 118 | 
            -
             | 
| 119 | 
            -
             | 
| 120 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 121 |  | 
| 122 | 
             
                    with gr.Row():
         | 
| 123 | 
            -
                        with gr.Column():
         | 
| 124 | 
            -
                             | 
| 125 | 
            -
                                 | 
| 126 | 
            -
                                 | 
| 127 | 
            -
                                 | 
| 128 | 
            -
                            )
         | 
| 129 | 
            -
                            speaker2_voice = gr.Dropdown(
         | 
| 130 | 
            -
                                choices=["af_nicole", "af_heart", "bf_emma"],
         | 
| 131 | 
            -
                                value="af_nicole",
         | 
| 132 | 
            -
                                label="Speaker 2 Voice"
         | 
| 133 | 
             
                            )
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 134 | 
             
                            provider = gr.Radio(
         | 
| 135 | 
             
                                choices=["openai", "openrouter"],
         | 
| 136 | 
             
                                value="openrouter",
         | 
| 137 | 
            -
                                label="API Provider | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 138 | 
             
                            )
         | 
| 139 | 
            -
                            submit_btn = gr.Button("Generate Audio")
         | 
| 140 |  | 
| 141 | 
            -
                     | 
| 142 | 
            -
             | 
| 143 | 
            -
                         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 144 |  | 
| 145 | 
             
                    submit_btn.click(
         | 
| 146 | 
             
                        fn=process_pdf,
         | 
| 147 | 
             
                        inputs=[pdf_input, speaker1_voice, speaker2_voice, provider],
         | 
| 148 | 
            -
                        outputs=[status_output, audio_output]
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 149 | 
             
                    )
         | 
| 150 |  | 
| 151 | 
             
                return app
         | 
| 152 |  | 
| 153 | 
            -
             | 
| 154 | 
             
            if __name__ == "__main__":
         | 
| 155 | 
             
                demo = create_gradio_app()
         | 
| 156 | 
            -
                demo. | 
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 8 | 
             
            import ast
         | 
| 9 | 
             
            import shutil
         | 
| 10 | 
             
            import warnings
         | 
| 11 | 
            +
            import os
         | 
| 12 | 
            +
            import gradio as gr
         | 
| 13 | 
            +
            from notebook_lm_kokoro import generate_podcast_script, generate_audio_from_script
         | 
| 14 | 
             
            warnings.filterwarnings("ignore")
         | 
| 15 |  | 
| 16 | 
             
            # A modified version of generate_audio_from_script to accept voice mapping
         | 
|  | |
| 71 | 
             
                    return None
         | 
| 72 |  | 
| 73 |  | 
| 74 | 
            +
            def process_pdf(pdf_file, speaker1_voice, speaker2_voice, provider, api_key, openrouter_base=None):
         | 
| 75 | 
             
                """Process the uploaded PDF file and generate audio"""
         | 
| 76 | 
             
                try:
         | 
| 77 | 
            +
                
         | 
| 78 | 
            +
                    # Set API configuration based on provider
         | 
| 79 | 
            +
                    if provider == "openai":
         | 
| 80 | 
            +
                        os.environ["OPENAI_API_KEY"] = api_key
         | 
| 81 | 
            +
                        os.environ["OPENROUTER_API_BASE"] = "https://api.openai.com/v1"
         | 
| 82 | 
            +
                    else:
         | 
| 83 | 
            +
                        os.environ["OPENAI_API_KEY"] = api_key
         | 
| 84 | 
            +
                        os.environ["OPENROUTER_API_BASE"] = openrouter_base or "https://openrouter.ai/api/v1"
         | 
| 85 | 
             
                    # Check if we received a valid file
         | 
| 86 | 
             
                    if pdf_file is None:
         | 
| 87 | 
             
                        return "No file uploaded", None
         | 
|  | |
| 123 |  | 
| 124 |  | 
| 125 | 
             
            def create_gradio_app():
         | 
| 126 | 
            +
                # Add CSS for better styling
         | 
| 127 | 
            +
                css = """
         | 
| 128 | 
            +
                .gradio-container {max-width: 900px !important}
         | 
| 129 | 
            +
                """
         | 
| 130 | 
            +
                
         | 
| 131 | 
            +
                with gr.Blocks(css=css, theme=gr.themes.Soft()) as app:
         | 
| 132 | 
            +
                    gr.Markdown(
         | 
| 133 | 
            +
                        """
         | 
| 134 | 
            +
                        # π NotebookLM-Kokoro TTS App
         | 
| 135 | 
            +
                        Upload a PDF, choose voices, and generate conversational audio using Kokoro TTS.
         | 
| 136 | 
            +
                        """
         | 
| 137 | 
            +
                    )
         | 
| 138 |  | 
| 139 | 
             
                    with gr.Row():
         | 
| 140 | 
            +
                        with gr.Column(scale=2):
         | 
| 141 | 
            +
                            pdf_input = gr.File(
         | 
| 142 | 
            +
                                label="Upload PDF Document",
         | 
| 143 | 
            +
                                file_types=[".pdf"],
         | 
| 144 | 
            +
                                type="filepath"
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 145 | 
             
                            )
         | 
| 146 | 
            +
                            
         | 
| 147 | 
            +
                            with gr.Row():
         | 
| 148 | 
            +
                                speaker1_voice = gr.Dropdown(
         | 
| 149 | 
            +
                                    choices=["af_heart", "af_bella", "hf_beta"],
         | 
| 150 | 
            +
                                    value="af_heart",
         | 
| 151 | 
            +
                                    label="Speaker 1 Voice"
         | 
| 152 | 
            +
                                )
         | 
| 153 | 
            +
                                speaker2_voice = gr.Dropdown(
         | 
| 154 | 
            +
                                    choices=["af_nicole", "af_heart", "bf_emma"],
         | 
| 155 | 
            +
                                    value="af_nicole",
         | 
| 156 | 
            +
                                    label="Speaker 2 Voice"
         | 
| 157 | 
            +
                                )
         | 
| 158 | 
            +
                            
         | 
| 159 | 
             
                            provider = gr.Radio(
         | 
| 160 | 
             
                                choices=["openai", "openrouter"],
         | 
| 161 | 
             
                                value="openrouter",
         | 
| 162 | 
            +
                                label="API Provider"
         | 
| 163 | 
            +
                            )
         | 
| 164 | 
            +
             | 
| 165 | 
            +
                            with gr.Group():
         | 
| 166 | 
            +
                                provider = gr.Radio(
         | 
| 167 | 
            +
                                    choices=["openai", "openrouter"],
         | 
| 168 | 
            +
                                    value="openrouter",
         | 
| 169 | 
            +
                                    label="API Provider"
         | 
| 170 | 
            +
                                )
         | 
| 171 | 
            +
                                
         | 
| 172 | 
            +
                                api_key = gr.Textbox(
         | 
| 173 | 
            +
                                    label="API Key",
         | 
| 174 | 
            +
                                    placeholder="Enter your API key here...",
         | 
| 175 | 
            +
                                    type="password",
         | 
| 176 | 
            +
                                    class_name="api-key-input"
         | 
| 177 | 
            +
                                )
         | 
| 178 | 
            +
                                
         | 
| 179 | 
            +
                                openrouter_base = gr.Textbox(
         | 
| 180 | 
            +
                                    label="OpenRouter Base URL (optional)",
         | 
| 181 | 
            +
                                    placeholder="https://openrouter.ai/api/v1",
         | 
| 182 | 
            +
                                    visible=False
         | 
| 183 | 
            +
                                )
         | 
| 184 | 
            +
             | 
| 185 | 
            +
                                # Show/hide OpenRouter base URL based on provider selection
         | 
| 186 | 
            +
                                def toggle_openrouter_base(provider_choice):
         | 
| 187 | 
            +
                                    return gr.update(visible=provider_choice == "openrouter")
         | 
| 188 | 
            +
                                
         | 
| 189 | 
            +
                                provider.change(
         | 
| 190 | 
            +
                                    fn=toggle_openrouter_base,
         | 
| 191 | 
            +
                                    inputs=[provider],
         | 
| 192 | 
            +
                                    outputs=[openrouter_base]
         | 
| 193 | 
            +
                                )
         | 
| 194 | 
            +
                            
         | 
| 195 | 
            +
                            submit_btn = gr.Button("ποΈ Generate Audio", variant="primary")
         | 
| 196 | 
            +
                        
         | 
| 197 | 
            +
                        with gr.Column(scale=2):
         | 
| 198 | 
            +
                            status_output = gr.Textbox(
         | 
| 199 | 
            +
                                label="Status",
         | 
| 200 | 
            +
                                placeholder="Processing status will appear here..."
         | 
| 201 | 
            +
                            )
         | 
| 202 | 
            +
                            audio_output = gr.Audio(
         | 
| 203 | 
            +
                                label="Generated Audio",
         | 
| 204 | 
            +
                                type="filepath"
         | 
| 205 | 
             
                            )
         | 
|  | |
| 206 |  | 
| 207 | 
            +
                    # Examples section
         | 
| 208 | 
            +
                    gr.Examples(
         | 
| 209 | 
            +
                        examples=[
         | 
| 210 | 
            +
                            ["sample.pdf", "af_heart", "af_nicole", "openrouter"],
         | 
| 211 | 
            +
                        ],
         | 
| 212 | 
            +
                        inputs=[pdf_input, speaker1_voice, speaker2_voice, provider],
         | 
| 213 | 
            +
                        outputs=[status_output, audio_output],
         | 
| 214 | 
            +
                        fn=process_pdf,
         | 
| 215 | 
            +
                        cache_examples=True,
         | 
| 216 | 
            +
                    )
         | 
| 217 |  | 
| 218 | 
             
                    submit_btn.click(
         | 
| 219 | 
             
                        fn=process_pdf,
         | 
| 220 | 
             
                        inputs=[pdf_input, speaker1_voice, speaker2_voice, provider],
         | 
| 221 | 
            +
                        outputs=[status_output, audio_output],
         | 
| 222 | 
            +
                        api_name="generate"
         | 
| 223 | 
            +
                    )
         | 
| 224 | 
            +
                    
         | 
| 225 | 
            +
                    gr.Markdown(
         | 
| 226 | 
            +
                        """
         | 
| 227 | 
            +
                        ### π Notes
         | 
| 228 | 
            +
                        - Make sure your PDF is readable and contains text (not scanned images)
         | 
| 229 | 
            +
                        - Processing large PDFs may take a few minutes
         | 
| 230 | 
            +
                        - You need a valid OpenAI/OpenRouter API key set as environment variable
         | 
| 231 | 
            +
                        """
         | 
| 232 | 
             
                    )
         | 
| 233 |  | 
| 234 | 
             
                return app
         | 
| 235 |  | 
|  | |
| 236 | 
             
            if __name__ == "__main__":
         | 
| 237 | 
             
                demo = create_gradio_app()
         | 
| 238 | 
            +
                demo.queue(concurrency_count=1).launch(
         | 
| 239 | 
            +
                    server_name="0.0.0.0",
         | 
| 240 | 
            +
                    server_port=7860,
         | 
| 241 | 
            +
                    share=True,
         | 
| 242 | 
            +
                    debug=True
         | 
| 243 | 
            +
                )
         | 
    	
        requirements.txt
    ADDED
    
    | @@ -0,0 +1,8 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            kokoro
         | 
| 2 | 
            +
            soundfile
         | 
| 3 | 
            +
            torch
         | 
| 4 | 
            +
            PyPDF2
         | 
| 5 | 
            +
            numpy
         | 
| 6 | 
            +
            openai
         | 
| 7 | 
            +
            ipython
         | 
| 8 | 
            +
            gradio>=4.0.0
         |