Spaces:

dwarkesh
/

producer

Running

App Files Files Community

dwarkesh commited on Apr 17

Commit

17841e1

verified ·

1 Parent(s): fb21a11

Update app.py

Browse files

Files changed (1) hide show

app.py +177 -72

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import gradio as gr
 import asyncio
 from pathlib import Path
 from google import genai
-from google.genai import types
 import os
 from dataclasses import dataclass
 from typing import Dict
@@ -17,9 +17,11 @@ class ContentRequest:
     prompt_key: str
 class ContentGenerator:
-    def __init__(self,api_key):
         self.current_prompts = self._load_default_prompts()
-        self.client = genai.Client(api_key=api_key)
     def _load_default_prompts(self) -> Dict[str, str]:
         """Load default prompts and examples from files and CSVs."""
@@ -62,42 +64,63 @@ class ContentGenerator:
         # Load base prompts and inject examples
         prompts = {}
         for key in ["previews", "clips", "description", "timestamps", "titles_and_thumbnails"]:
-            prompt = Path(f"prompts/{key}.txt").read_text()
-            # Inject relevant examples
-            if key == "timestamps":
-                prompt = prompt.replace("{timestamps_examples}", timestamp_examples)
-            elif key == "titles_and_thumbnails":
-                prompt = prompt.replace("{title_examples}", title_examples)
-            elif key == "description":
-                prompt = prompt.replace("{description_examples}", description_examples)
-            elif key == "clips":
-                prompt = prompt.replace("{clip_examples}", clip_examples)
-            prompts[key] = prompt
         return prompts
     async def generate_content(self, request: ContentRequest, transcript: str) -> str:
         """Generate content using Gemini asynchronously."""
         try:
             print(f"\nFull prompt for {request.prompt_key}:")
             print("=== SYSTEM PROMPT ===")
-            print(self.current_prompts[request.prompt_key])
             print("=== END SYSTEM PROMPT ===\n")
             response = self.client.models.generate_content(
                 model="gemini-2.5-pro-exp-03-25",
-                config=types.GenerateContentConfig(system_instruction=self.current_prompts[request.prompt_key]),
                 contents=transcript
             )
-            if response and hasattr(response, 'candidates'):
                 return response.text
             else:
-                return f"Error: Unexpected response structure for {request.prompt_key}"
         except Exception as e:
             return f"Error generating content: {str(e)}"
 def extract_video_id(url: str) -> str:
@@ -118,7 +141,8 @@ def get_transcript(video_id: str) -> str:
 class TranscriptProcessor:
     def __init__(self):
-        self.generator = ContentGenerator(api_key=os.getenv("GOOGLE_API_KEY"))
     def _get_youtube_transcript(self, url: str) -> str:
@@ -130,20 +154,53 @@ class TranscriptProcessor:
         except Exception as e:
             raise Exception(f"Error fetching YouTube transcript: {str(e)}")
-    async def process_transcript(self, audio_file):
         """Process input and generate all content."""
-        audio_path = audio_file.name
         try:
-            aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
             config = aai.TranscriptionConfig(speaker_labels=True, language_code="en")
-            transcript_iter = aai.Transcriber().transcribe(str(audio_path), config=config)
             transcript = transcript_iter.text
             # Process each type sequentially
             sections = {}
-            for key in ["titles_and_thumbnails", "description", "previews", "clips", "timestamps"]:
-                result = await self.generator.generate_content(ContentRequest(key), transcript)
-                sections[key] = result
             # Combine into markdown with H2 headers
             markdown = f"""
@@ -170,16 +227,18 @@ class TranscriptProcessor:
             return markdown
         except Exception as e:
             return f"Error processing input: {str(e)}"
     def update_prompts(self, *values) -> str:
         """Update the current session's prompts."""
-        self.generator.current_prompts.update(zip(
-            ["previews", "clips", "description", "timestamps", "titles_and_thumbnails"],
-            values
-        ))
-        return "Prompts updated for this session!"
 def create_interface():
@@ -190,13 +249,29 @@ def create_interface():
         gr.Markdown(
             """
             # Gemini Podcast Content Generator
-            Generate preview clips, timestamps, descriptions and more from an audio file using Gemini.
-            Simply upload an audio file to get started and Gemini handles the rest.
             """
         )
-        with gr.Tab("Generate Content"):
             input_audio = gr.File(
                 label="Upload Audio File",
                 file_count="single",
@@ -204,31 +279,57 @@ def create_interface():
             )
             submit_btn = gr.Button("Generate Content with Gemini")
-            output = gr.Markdown()  # Single markdown output
-            async def process_wrapper(text):
                 print("Process wrapper started")
-                print(f"Input text: {text[:100]}...")
                 try:
-                    result = await processor.process_transcript(text)
-                    print("Process completed, got results")
-                    return result
                 except Exception as e:
-                    print(f"Error in process_wrapper: {str(e)}")
-                    return f"# Error\n\n{str(e)}"
             submit_btn.click(
                 fn=process_wrapper,
-                inputs=input_audio,
                 outputs=output,
-                queue=True
             )
         with gr.Tab("Customize Prompts"):
             gr.Markdown(
                 """
-                ## Customize Generation Prompts
                 Here you can experiment with different prompts during your session.
                 Changes will remain active until you reload the page.
@@ -236,41 +337,45 @@ def create_interface():
                 """
             )
             prompt_inputs = [
                 gr.Textbox(
                     label=f"{key.replace('_', ' ').title()} Prompt",
                     lines=10,
-                    value=processor.generator.current_prompts[key]
                 )
-                for key in [
-                    "previews",
-                    "clips",
-                    "description",
-                    "timestamps",
-                    "titles_and_thumbnails"
-                ]
             ]
             status = gr.Textbox(label="Status", interactive=False)
-            # Update prompts when they change
-            for prompt in prompt_inputs:
-                prompt.change(
-                    fn=processor.update_prompts,
-                    inputs=prompt_inputs,
-                    outputs=[status]
-                )
-            # Reset button
-            reset_btn = gr.Button("Reset to Default Prompts")
             reset_btn.click(
-                fn=lambda: (
-                    processor.update_prompts(*processor.generator.current_prompts.values()),
-                    *processor.generator.current_prompts.values(),
-                ),
-                outputs=[status] + prompt_inputs,
             )
     return app
 if __name__ == "__main__":
-    create_interface().launch()

 import asyncio
 from pathlib import Path
 from google import genai
+from google.genai import types # Import types for error handling
 import os
 from dataclasses import dataclass
 from typing import Dict
     prompt_key: str
 class ContentGenerator:
+    # Modified __init__ slightly - allow api_key=None initially
+    def __init__(self, api_key=None):
         self.current_prompts = self._load_default_prompts()
+        # Initialize client only if key is provided, otherwise set to None
+        self.client = genai.Client(api_key=api_key) if api_key else None
     def _load_default_prompts(self) -> Dict[str, str]:
         """Load default prompts and examples from files and CSVs."""
         # Load base prompts and inject examples
         prompts = {}
         for key in ["previews", "clips", "description", "timestamps", "titles_and_thumbnails"]:
+            try: # Add try-except for file reading
+                prompt = Path(f"prompts/{key}.txt").read_text()
+                # Inject relevant examples
+                if key == "timestamps":
+                    prompt = prompt.replace("{timestamps_examples}", timestamp_examples)
+                elif key == "titles_and_thumbnails":
+                    prompt = prompt.replace("{title_examples}", title_examples)
+                elif key == "description":
+                    prompt = prompt.replace("{description_examples}", description_examples)
+                elif key == "clips":
+                    prompt = prompt.replace("{clip_examples}", clip_examples)
+                prompts[key] = prompt
+            except FileNotFoundError:
+                 print(f"Warning: Prompt file prompts/{key}.txt not found. Using empty prompt.")
+                 prompts[key] = "" # Use empty prompt if file is missing
+            except Exception as e:
+                 print(f"Warning: Error loading prompt file prompts/{key}.txt: {e}")
+                 prompts[key] = ""
         return prompts
     async def generate_content(self, request: ContentRequest, transcript: str) -> str:
         """Generate content using Gemini asynchronously."""
+        # Check if client is initialized
+        if not self.client:
+             return f"Error: Google AI Client not initialized. Please provide an API key."
         try:
             print(f"\nFull prompt for {request.prompt_key}:")
             print("=== SYSTEM PROMPT ===")
+            # Ensure prompt exists
+            system_prompt = self.current_prompts.get(request.prompt_key, "")
+            if not system_prompt:
+                 print(f"Warning: Empty system prompt for {request.prompt_key}")
+            print(system_prompt)
             print("=== END SYSTEM PROMPT ===\n")
             response = self.client.models.generate_content(
                 model="gemini-2.5-pro-exp-03-25",
+                config=types.GenerateContentConfig(system_instruction=system_prompt),
                 contents=transcript
             )
+            if response and hasattr(response, 'text'): # Simpler check for Gemini API response
                 return response.text
             else:
+                # Try to get more details if possible
+                error_details = getattr(response, 'prompt_feedback', 'Unknown reason')
+                print(f"Unexpected Gemini response structure for {request.prompt_key}. Response: {response}")
+                return f"Error: Unexpected response structure for {request.prompt_key}. Details: {error_details}"
+        except types.PermissionDeniedError as e:
+             print(f"Permission Denied Error generating content for {request.prompt_key}: {e}")
+             return f"Error generating content: Permission Denied. Please check your Google API Key. Details: {str(e)}"
         except Exception as e:
+            print(f"Error generating content for {request.prompt_key}: {e}")
             return f"Error generating content: {str(e)}"
 def extract_video_id(url: str) -> str:
 class TranscriptProcessor:
     def __init__(self):
+        # Initialize generator without API key initially
+        self.generator = ContentGenerator(api_key=None) # No key needed at init
     def _get_youtube_transcript(self, url: str) -> str:
         except Exception as e:
             raise Exception(f"Error fetching YouTube transcript: {str(e)}")
+    # Modify process_transcript to accept the AssemblyAI key
+    async def process_transcript(self, audio_file, assemblyai_api_key: str):
         """Process input and generate all content."""
+        if not audio_file:
+            raise ValueError("No audio file provided.")
+        if not assemblyai_api_key:
+            raise ValueError("AssemblyAI API Key is required.")
+        audio_path = Path(audio_file.name) # Use Path object
+        if not audio_path.exists():
+            raise FileNotFoundError(f"Audio file not found at path: {audio_path}")
         try:
+            # Set AssemblyAI key just before use
+            aai.settings.api_key = assemblyai_api_key
+            print(f"Transcribing file: {audio_path}")
             config = aai.TranscriptionConfig(speaker_labels=True, language_code="en")
+            transcriber = aai.Transcriber()
+            transcript_iter = transcriber.transcribe(str(audio_path), config=config) # Ensure path is string
+            if transcript_iter.error:
+                 raise Exception(f"AssemblyAI Transcription Error: {transcript_iter.error}")
+            if not transcript_iter.text:
+                 return "Error: Transcription resulted in empty text."
             transcript = transcript_iter.text
+            print("Transcription successful.")
             # Process each type sequentially
             sections = {}
+            tasks = []
+            keys = ["titles_and_thumbnails", "description", "previews", "clips", "timestamps"]
+            print("Starting content generation tasks...")
+            # Create concurrent tasks for Gemini generation
+            for key in keys:
+                 tasks.append(asyncio.create_task(
+                      self.generator.generate_content(ContentRequest(key), transcript)
+                 ))
+            # Wait for all tasks to complete
+            results = await asyncio.gather(*tasks)
+            print("Content generation tasks completed.")
+            # Assign results back to sections
+            for i, key in enumerate(keys):
+                 sections[key] = results[i]
             # Combine into markdown with H2 headers
             markdown = f"""
             return markdown
         except Exception as e:
+            # Log the full traceback for debugging
+            import traceback
+            print(f"Error during transcript processing: {traceback.format_exc()}")
             return f"Error processing input: {str(e)}"
     def update_prompts(self, *values) -> str:
         """Update the current session's prompts."""
+        keys = ["previews", "clips", "description", "timestamps", "titles_and_thumbnails"]
+        self.generator.current_prompts.update(zip(keys, values))
+        # Check if all keys were updated correctly
+        updated_keys_str = ", ".join(k for k, v in zip(keys, values) if v is not None)
+        return f"Prompts updated for this session: {updated_keys_str}"
 def create_interface():
         gr.Markdown(
             """
             # Gemini Podcast Content Generator
+            Generate preview clips, timestamps, descriptions and more from podcast transcripts using Gemini.
+            **Important:** Enter your API keys below before uploading your audio file.
             """
         )
+        with gr.Tab("Generate Content with Gemini"):
+            # --- ADDED API KEY INPUTS ---
+            google_api_key_input = gr.Textbox(
+                label="Google API Key",
+                placeholder="Enter your Google AI Studio API Key here (e.g., AIza...)",
+                type="password",
+                info="Your GCP account needs to have billing enabled to use the 2.5 pro model.",
+                # value=os.getenv("GOOGLE_API_KEY", "") # Optionally preload from env if available
+            )
+            assemblyai_api_key_input = gr.Textbox(
+                label="AssemblyAI API Key",
+                placeholder="Enter your AssemblyAI API Key here",
+                type="password",
+                # value=os.getenv("ASSEMBLYAI_API_KEY", "") # Optionally preload from env if available
+            )
+            # --- END OF ADDED INPUTS ---
             input_audio = gr.File(
                 label="Upload Audio File",
                 file_count="single",
             )
             submit_btn = gr.Button("Generate Content with Gemini")
+            output = gr.Markdown(label="Generated Content") # Added label
+            # Modify the wrapper function signature to accept API keys
+            async def process_wrapper(google_key, assemblyai_key, audio_file_obj):
                 print("Process wrapper started")
+                # 1. Validate inputs
+                print(f"Received Google Key: {'*' * (len(google_key) - 4) + google_key[-4:] if len(google_key) > 4 else '****'}")
+                print(f"Received AssemblyAI Key: {'*' * (len(assemblyai_key) - 4) + assemblyai_key[-4:] if len(assemblyai_key) > 4 else '****'}")
+                print(f"Audio file object received: Name='{getattr(audio_file_obj, 'name', 'N/A')}'")
+                # Show processing message
+                yield gr.update(value="Processing... Setting up clients and starting transcription...")
                 try:
+                    # 2. Re-initialize/Update Google client with the provided key *before* processing
+                    # This assumes processor.generator exists and is the correct instance
+                    print("Initializing Google Client...")
+                    processor.generator.client = genai.Client(api_key=google_key)
+                    print("Google client initialized.")
+                    # 3. Call process_transcript, passing the AssemblyAI key and audio object
+                    yield gr.update(value="Processing... Transcribing audio with AssemblyAI...")
+                    result = await processor.process_transcript(audio_file_obj, assemblyai_key)
+                    print("Process completed, returning results.")
+                    yield gr.update(value=result) # Final update with the result
+                except types.PermissionDeniedError as e:
+                     error_msg = f"# Error\n\nPermission Denied: Please check your Google API Key. Details: {str(e)}"
+                     print(error_msg)
+                     yield gr.update(value=error_msg)
                 except Exception as e:
+                    # Log the full traceback for debugging
+                    import traceback
+                    print(f"Error in process_wrapper: {traceback.format_exc()}")
+                    error_msg = f"# Error\n\nAn unexpected error occurred: {str(e)}"
+                    yield gr.update(value=error_msg) # Update output with error
+            # Modify the submit_btn.click inputs to include the API key textboxes
             submit_btn.click(
                 fn=process_wrapper,
+                # Order matters: matches the function signature (google_key, assemblyai_key, audio_file_obj)
+                inputs=[google_api_key_input, assemblyai_api_key_input, input_audio],
                 outputs=output,
+                # Removed queue=True as yield requires it to be False or None (default)
             )
         with gr.Tab("Customize Prompts"):
             gr.Markdown(
                 """
+                ## Customize Generation Prompts for Gemini
                 Here you can experiment with different prompts during your session.
                 Changes will remain active until you reload the page.
                 """
             )
+            # Use the keys defined earlier for consistency
+            prompt_keys = ["previews", "clips", "description", "timestamps", "titles_and_thumbnails"]
             prompt_inputs = [
                 gr.Textbox(
                     label=f"{key.replace('_', ' ').title()} Prompt",
                     lines=10,
+                    value=processor.generator.current_prompts.get(key, "") # Use .get for safety
                 )
+                for key in prompt_keys
             ]
             status = gr.Textbox(label="Status", interactive=False)
+            # --- Simplified Update Logic ---
+            update_btn = gr.Button("Update Session Prompts")
+            update_btn.click(
+                 fn=processor.update_prompts,
+                 inputs=prompt_inputs,
+                 outputs=[status]
+            )
+            # --- End Simplified Update Logic ---
+            # Reset button - fetches defaults again
+            reset_btn = gr.Button("Reset to Default Gemini Prompts")
+            # Define a helper function for reset to avoid complex lambda
+            def reset_prompts_ui():
+                 default_prompts_dict = processor.generator._load_default_prompts()
+                 processor.generator.current_prompts = default_prompts_dict # Update internal state
+                 # Return values in the correct order for outputs
+                 return [ "Prompts reset to defaults!" ] + [ default_prompts_dict.get(key, "") for key in prompt_keys ]
             reset_btn.click(
+                fn=reset_prompts_ui,
+                inputs=None, # No inputs needed
+                outputs=[status] + prompt_inputs # Update status and all textboxes
             )
     return app
 if __name__ == "__main__":
+    app = create_interface()
+    app.launch()