dwarkesh commited on
Commit
17841e1
·
verified ·
1 Parent(s): fb21a11

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +177 -72
app.py CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
2
  import asyncio
3
  from pathlib import Path
4
  from google import genai
5
- from google.genai import types
6
  import os
7
  from dataclasses import dataclass
8
  from typing import Dict
@@ -17,9 +17,11 @@ class ContentRequest:
17
  prompt_key: str
18
 
19
  class ContentGenerator:
20
- def __init__(self,api_key):
 
21
  self.current_prompts = self._load_default_prompts()
22
- self.client = genai.Client(api_key=api_key)
 
23
 
24
  def _load_default_prompts(self) -> Dict[str, str]:
25
  """Load default prompts and examples from files and CSVs."""
@@ -62,42 +64,63 @@ class ContentGenerator:
62
  # Load base prompts and inject examples
63
  prompts = {}
64
  for key in ["previews", "clips", "description", "timestamps", "titles_and_thumbnails"]:
65
- prompt = Path(f"prompts/{key}.txt").read_text()
66
-
67
- # Inject relevant examples
68
- if key == "timestamps":
69
- prompt = prompt.replace("{timestamps_examples}", timestamp_examples)
70
- elif key == "titles_and_thumbnails":
71
- prompt = prompt.replace("{title_examples}", title_examples)
72
- elif key == "description":
73
- prompt = prompt.replace("{description_examples}", description_examples)
74
- elif key == "clips":
75
- prompt = prompt.replace("{clip_examples}", clip_examples)
76
-
77
- prompts[key] = prompt
 
 
 
 
 
 
 
78
 
79
  return prompts
80
 
81
  async def generate_content(self, request: ContentRequest, transcript: str) -> str:
82
  """Generate content using Gemini asynchronously."""
 
 
 
83
  try:
84
  print(f"\nFull prompt for {request.prompt_key}:")
85
  print("=== SYSTEM PROMPT ===")
86
- print(self.current_prompts[request.prompt_key])
 
 
 
 
87
  print("=== END SYSTEM PROMPT ===\n")
88
 
89
  response = self.client.models.generate_content(
90
  model="gemini-2.5-pro-exp-03-25",
91
- config=types.GenerateContentConfig(system_instruction=self.current_prompts[request.prompt_key]),
92
  contents=transcript
93
  )
94
 
95
- if response and hasattr(response, 'candidates'):
96
  return response.text
97
  else:
98
- return f"Error: Unexpected response structure for {request.prompt_key}"
99
-
 
 
 
 
 
 
100
  except Exception as e:
 
101
  return f"Error generating content: {str(e)}"
102
 
103
  def extract_video_id(url: str) -> str:
@@ -118,7 +141,8 @@ def get_transcript(video_id: str) -> str:
118
 
119
  class TranscriptProcessor:
120
  def __init__(self):
121
- self.generator = ContentGenerator(api_key=os.getenv("GOOGLE_API_KEY"))
 
122
 
123
 
124
  def _get_youtube_transcript(self, url: str) -> str:
@@ -130,20 +154,53 @@ class TranscriptProcessor:
130
  except Exception as e:
131
  raise Exception(f"Error fetching YouTube transcript: {str(e)}")
132
 
133
- async def process_transcript(self, audio_file):
 
134
  """Process input and generate all content."""
135
- audio_path = audio_file.name
 
 
 
 
 
 
 
 
136
  try:
137
- aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
 
 
138
  config = aai.TranscriptionConfig(speaker_labels=True, language_code="en")
139
- transcript_iter = aai.Transcriber().transcribe(str(audio_path), config=config)
 
 
 
 
 
 
 
140
  transcript = transcript_iter.text
 
141
 
142
  # Process each type sequentially
143
  sections = {}
144
- for key in ["titles_and_thumbnails", "description", "previews", "clips", "timestamps"]:
145
- result = await self.generator.generate_content(ContentRequest(key), transcript)
146
- sections[key] = result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
  # Combine into markdown with H2 headers
149
  markdown = f"""
@@ -170,16 +227,18 @@ class TranscriptProcessor:
170
  return markdown
171
 
172
  except Exception as e:
 
 
 
173
  return f"Error processing input: {str(e)}"
174
 
175
  def update_prompts(self, *values) -> str:
176
  """Update the current session's prompts."""
177
- self.generator.current_prompts.update(zip(
178
- ["previews", "clips", "description", "timestamps", "titles_and_thumbnails"],
179
- values
180
- ))
181
- return "Prompts updated for this session!"
182
-
183
 
184
 
185
  def create_interface():
@@ -190,13 +249,29 @@ def create_interface():
190
  gr.Markdown(
191
  """
192
  # Gemini Podcast Content Generator
193
- Generate preview clips, timestamps, descriptions and more from an audio file using Gemini.
194
 
195
- Simply upload an audio file to get started and Gemini handles the rest.
196
  """
197
  )
198
 
199
- with gr.Tab("Generate Content"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  input_audio = gr.File(
201
  label="Upload Audio File",
202
  file_count="single",
@@ -204,31 +279,57 @@ def create_interface():
204
  )
205
  submit_btn = gr.Button("Generate Content with Gemini")
206
 
207
- output = gr.Markdown() # Single markdown output
208
 
209
- async def process_wrapper(text):
 
210
  print("Process wrapper started")
211
- print(f"Input text: {text[:100]}...")
 
 
 
 
 
 
 
212
 
213
  try:
214
- result = await processor.process_transcript(text)
215
- print("Process completed, got results")
216
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  except Exception as e:
218
- print(f"Error in process_wrapper: {str(e)}")
219
- return f"# Error\n\n{str(e)}"
 
 
 
220
 
 
221
  submit_btn.click(
222
  fn=process_wrapper,
223
- inputs=input_audio,
 
224
  outputs=output,
225
- queue=True
226
  )
227
 
228
  with gr.Tab("Customize Prompts"):
229
  gr.Markdown(
230
  """
231
- ## Customize Generation Prompts
232
  Here you can experiment with different prompts during your session.
233
  Changes will remain active until you reload the page.
234
 
@@ -236,41 +337,45 @@ def create_interface():
236
  """
237
  )
238
 
 
 
239
  prompt_inputs = [
240
  gr.Textbox(
241
  label=f"{key.replace('_', ' ').title()} Prompt",
242
  lines=10,
243
- value=processor.generator.current_prompts[key]
244
  )
245
- for key in [
246
- "previews",
247
- "clips",
248
- "description",
249
- "timestamps",
250
- "titles_and_thumbnails"
251
- ]
252
  ]
253
  status = gr.Textbox(label="Status", interactive=False)
254
 
255
- # Update prompts when they change
256
- for prompt in prompt_inputs:
257
- prompt.change(
258
- fn=processor.update_prompts,
259
- inputs=prompt_inputs,
260
- outputs=[status]
261
- )
 
 
 
 
 
 
 
 
 
 
 
262
 
263
- # Reset button
264
- reset_btn = gr.Button("Reset to Default Prompts")
265
  reset_btn.click(
266
- fn=lambda: (
267
- processor.update_prompts(*processor.generator.current_prompts.values()),
268
- *processor.generator.current_prompts.values(),
269
- ),
270
- outputs=[status] + prompt_inputs,
271
  )
272
 
273
  return app
274
 
275
  if __name__ == "__main__":
276
- create_interface().launch()
 
 
2
  import asyncio
3
  from pathlib import Path
4
  from google import genai
5
+ from google.genai import types # Import types for error handling
6
  import os
7
  from dataclasses import dataclass
8
  from typing import Dict
 
17
  prompt_key: str
18
 
19
  class ContentGenerator:
20
+ # Modified __init__ slightly - allow api_key=None initially
21
+ def __init__(self, api_key=None):
22
  self.current_prompts = self._load_default_prompts()
23
+ # Initialize client only if key is provided, otherwise set to None
24
+ self.client = genai.Client(api_key=api_key) if api_key else None
25
 
26
  def _load_default_prompts(self) -> Dict[str, str]:
27
  """Load default prompts and examples from files and CSVs."""
 
64
  # Load base prompts and inject examples
65
  prompts = {}
66
  for key in ["previews", "clips", "description", "timestamps", "titles_and_thumbnails"]:
67
+ try: # Add try-except for file reading
68
+ prompt = Path(f"prompts/{key}.txt").read_text()
69
+
70
+ # Inject relevant examples
71
+ if key == "timestamps":
72
+ prompt = prompt.replace("{timestamps_examples}", timestamp_examples)
73
+ elif key == "titles_and_thumbnails":
74
+ prompt = prompt.replace("{title_examples}", title_examples)
75
+ elif key == "description":
76
+ prompt = prompt.replace("{description_examples}", description_examples)
77
+ elif key == "clips":
78
+ prompt = prompt.replace("{clip_examples}", clip_examples)
79
+
80
+ prompts[key] = prompt
81
+ except FileNotFoundError:
82
+ print(f"Warning: Prompt file prompts/{key}.txt not found. Using empty prompt.")
83
+ prompts[key] = "" # Use empty prompt if file is missing
84
+ except Exception as e:
85
+ print(f"Warning: Error loading prompt file prompts/{key}.txt: {e}")
86
+ prompts[key] = ""
87
 
88
  return prompts
89
 
90
  async def generate_content(self, request: ContentRequest, transcript: str) -> str:
91
  """Generate content using Gemini asynchronously."""
92
+ # Check if client is initialized
93
+ if not self.client:
94
+ return f"Error: Google AI Client not initialized. Please provide an API key."
95
  try:
96
  print(f"\nFull prompt for {request.prompt_key}:")
97
  print("=== SYSTEM PROMPT ===")
98
+ # Ensure prompt exists
99
+ system_prompt = self.current_prompts.get(request.prompt_key, "")
100
+ if not system_prompt:
101
+ print(f"Warning: Empty system prompt for {request.prompt_key}")
102
+ print(system_prompt)
103
  print("=== END SYSTEM PROMPT ===\n")
104
 
105
  response = self.client.models.generate_content(
106
  model="gemini-2.5-pro-exp-03-25",
107
+ config=types.GenerateContentConfig(system_instruction=system_prompt),
108
  contents=transcript
109
  )
110
 
111
+ if response and hasattr(response, 'text'): # Simpler check for Gemini API response
112
  return response.text
113
  else:
114
+ # Try to get more details if possible
115
+ error_details = getattr(response, 'prompt_feedback', 'Unknown reason')
116
+ print(f"Unexpected Gemini response structure for {request.prompt_key}. Response: {response}")
117
+ return f"Error: Unexpected response structure for {request.prompt_key}. Details: {error_details}"
118
+
119
+ except types.PermissionDeniedError as e:
120
+ print(f"Permission Denied Error generating content for {request.prompt_key}: {e}")
121
+ return f"Error generating content: Permission Denied. Please check your Google API Key. Details: {str(e)}"
122
  except Exception as e:
123
+ print(f"Error generating content for {request.prompt_key}: {e}")
124
  return f"Error generating content: {str(e)}"
125
 
126
  def extract_video_id(url: str) -> str:
 
141
 
142
  class TranscriptProcessor:
143
  def __init__(self):
144
+ # Initialize generator without API key initially
145
+ self.generator = ContentGenerator(api_key=None) # No key needed at init
146
 
147
 
148
  def _get_youtube_transcript(self, url: str) -> str:
 
154
  except Exception as e:
155
  raise Exception(f"Error fetching YouTube transcript: {str(e)}")
156
 
157
+ # Modify process_transcript to accept the AssemblyAI key
158
+ async def process_transcript(self, audio_file, assemblyai_api_key: str):
159
  """Process input and generate all content."""
160
+ if not audio_file:
161
+ raise ValueError("No audio file provided.")
162
+ if not assemblyai_api_key:
163
+ raise ValueError("AssemblyAI API Key is required.")
164
+
165
+ audio_path = Path(audio_file.name) # Use Path object
166
+ if not audio_path.exists():
167
+ raise FileNotFoundError(f"Audio file not found at path: {audio_path}")
168
+
169
  try:
170
+ # Set AssemblyAI key just before use
171
+ aai.settings.api_key = assemblyai_api_key
172
+ print(f"Transcribing file: {audio_path}")
173
  config = aai.TranscriptionConfig(speaker_labels=True, language_code="en")
174
+ transcriber = aai.Transcriber()
175
+ transcript_iter = transcriber.transcribe(str(audio_path), config=config) # Ensure path is string
176
+
177
+ if transcript_iter.error:
178
+ raise Exception(f"AssemblyAI Transcription Error: {transcript_iter.error}")
179
+ if not transcript_iter.text:
180
+ return "Error: Transcription resulted in empty text."
181
+
182
  transcript = transcript_iter.text
183
+ print("Transcription successful.")
184
 
185
  # Process each type sequentially
186
  sections = {}
187
+ tasks = []
188
+ keys = ["titles_and_thumbnails", "description", "previews", "clips", "timestamps"]
189
+
190
+ print("Starting content generation tasks...")
191
+ # Create concurrent tasks for Gemini generation
192
+ for key in keys:
193
+ tasks.append(asyncio.create_task(
194
+ self.generator.generate_content(ContentRequest(key), transcript)
195
+ ))
196
+
197
+ # Wait for all tasks to complete
198
+ results = await asyncio.gather(*tasks)
199
+ print("Content generation tasks completed.")
200
+
201
+ # Assign results back to sections
202
+ for i, key in enumerate(keys):
203
+ sections[key] = results[i]
204
 
205
  # Combine into markdown with H2 headers
206
  markdown = f"""
 
227
  return markdown
228
 
229
  except Exception as e:
230
+ # Log the full traceback for debugging
231
+ import traceback
232
+ print(f"Error during transcript processing: {traceback.format_exc()}")
233
  return f"Error processing input: {str(e)}"
234
 
235
  def update_prompts(self, *values) -> str:
236
  """Update the current session's prompts."""
237
+ keys = ["previews", "clips", "description", "timestamps", "titles_and_thumbnails"]
238
+ self.generator.current_prompts.update(zip(keys, values))
239
+ # Check if all keys were updated correctly
240
+ updated_keys_str = ", ".join(k for k, v in zip(keys, values) if v is not None)
241
+ return f"Prompts updated for this session: {updated_keys_str}"
 
242
 
243
 
244
  def create_interface():
 
249
  gr.Markdown(
250
  """
251
  # Gemini Podcast Content Generator
252
+ Generate preview clips, timestamps, descriptions and more from podcast transcripts using Gemini.
253
 
254
+ **Important:** Enter your API keys below before uploading your audio file.
255
  """
256
  )
257
 
258
+ with gr.Tab("Generate Content with Gemini"):
259
+ # --- ADDED API KEY INPUTS ---
260
+ google_api_key_input = gr.Textbox(
261
+ label="Google API Key",
262
+ placeholder="Enter your Google AI Studio API Key here (e.g., AIza...)",
263
+ type="password",
264
+ info="Your GCP account needs to have billing enabled to use the 2.5 pro model.",
265
+ # value=os.getenv("GOOGLE_API_KEY", "") # Optionally preload from env if available
266
+ )
267
+ assemblyai_api_key_input = gr.Textbox(
268
+ label="AssemblyAI API Key",
269
+ placeholder="Enter your AssemblyAI API Key here",
270
+ type="password",
271
+ # value=os.getenv("ASSEMBLYAI_API_KEY", "") # Optionally preload from env if available
272
+ )
273
+ # --- END OF ADDED INPUTS ---
274
+
275
  input_audio = gr.File(
276
  label="Upload Audio File",
277
  file_count="single",
 
279
  )
280
  submit_btn = gr.Button("Generate Content with Gemini")
281
 
282
+ output = gr.Markdown(label="Generated Content") # Added label
283
 
284
+ # Modify the wrapper function signature to accept API keys
285
+ async def process_wrapper(google_key, assemblyai_key, audio_file_obj):
286
  print("Process wrapper started")
287
+ # 1. Validate inputs
288
+
289
+ print(f"Received Google Key: {'*' * (len(google_key) - 4) + google_key[-4:] if len(google_key) > 4 else '****'}")
290
+ print(f"Received AssemblyAI Key: {'*' * (len(assemblyai_key) - 4) + assemblyai_key[-4:] if len(assemblyai_key) > 4 else '****'}")
291
+ print(f"Audio file object received: Name='{getattr(audio_file_obj, 'name', 'N/A')}'")
292
+
293
+ # Show processing message
294
+ yield gr.update(value="Processing... Setting up clients and starting transcription...")
295
 
296
  try:
297
+ # 2. Re-initialize/Update Google client with the provided key *before* processing
298
+ # This assumes processor.generator exists and is the correct instance
299
+ print("Initializing Google Client...")
300
+ processor.generator.client = genai.Client(api_key=google_key)
301
+ print("Google client initialized.")
302
+
303
+ # 3. Call process_transcript, passing the AssemblyAI key and audio object
304
+ yield gr.update(value="Processing... Transcribing audio with AssemblyAI...")
305
+ result = await processor.process_transcript(audio_file_obj, assemblyai_key)
306
+ print("Process completed, returning results.")
307
+ yield gr.update(value=result) # Final update with the result
308
+
309
+ except types.PermissionDeniedError as e:
310
+ error_msg = f"# Error\n\nPermission Denied: Please check your Google API Key. Details: {str(e)}"
311
+ print(error_msg)
312
+ yield gr.update(value=error_msg)
313
  except Exception as e:
314
+ # Log the full traceback for debugging
315
+ import traceback
316
+ print(f"Error in process_wrapper: {traceback.format_exc()}")
317
+ error_msg = f"# Error\n\nAn unexpected error occurred: {str(e)}"
318
+ yield gr.update(value=error_msg) # Update output with error
319
 
320
+ # Modify the submit_btn.click inputs to include the API key textboxes
321
  submit_btn.click(
322
  fn=process_wrapper,
323
+ # Order matters: matches the function signature (google_key, assemblyai_key, audio_file_obj)
324
+ inputs=[google_api_key_input, assemblyai_api_key_input, input_audio],
325
  outputs=output,
326
+ # Removed queue=True as yield requires it to be False or None (default)
327
  )
328
 
329
  with gr.Tab("Customize Prompts"):
330
  gr.Markdown(
331
  """
332
+ ## Customize Generation Prompts for Gemini
333
  Here you can experiment with different prompts during your session.
334
  Changes will remain active until you reload the page.
335
 
 
337
  """
338
  )
339
 
340
+ # Use the keys defined earlier for consistency
341
+ prompt_keys = ["previews", "clips", "description", "timestamps", "titles_and_thumbnails"]
342
  prompt_inputs = [
343
  gr.Textbox(
344
  label=f"{key.replace('_', ' ').title()} Prompt",
345
  lines=10,
346
+ value=processor.generator.current_prompts.get(key, "") # Use .get for safety
347
  )
348
+ for key in prompt_keys
 
 
 
 
 
 
349
  ]
350
  status = gr.Textbox(label="Status", interactive=False)
351
 
352
+ # --- Simplified Update Logic ---
353
+ update_btn = gr.Button("Update Session Prompts")
354
+ update_btn.click(
355
+ fn=processor.update_prompts,
356
+ inputs=prompt_inputs,
357
+ outputs=[status]
358
+ )
359
+ # --- End Simplified Update Logic ---
360
+
361
+
362
+ # Reset button - fetches defaults again
363
+ reset_btn = gr.Button("Reset to Default Gemini Prompts")
364
+ # Define a helper function for reset to avoid complex lambda
365
+ def reset_prompts_ui():
366
+ default_prompts_dict = processor.generator._load_default_prompts()
367
+ processor.generator.current_prompts = default_prompts_dict # Update internal state
368
+ # Return values in the correct order for outputs
369
+ return [ "Prompts reset to defaults!" ] + [ default_prompts_dict.get(key, "") for key in prompt_keys ]
370
 
 
 
371
  reset_btn.click(
372
+ fn=reset_prompts_ui,
373
+ inputs=None, # No inputs needed
374
+ outputs=[status] + prompt_inputs # Update status and all textboxes
 
 
375
  )
376
 
377
  return app
378
 
379
  if __name__ == "__main__":
380
+ app = create_interface()
381
+ app.launch()