|
import gradio as gr |
|
import anthropic |
|
import pandas as pd |
|
from typing import Tuple, Dict, List |
|
from youtube_transcript_api import YouTubeTranscriptApi |
|
import re |
|
from pathlib import Path |
|
import asyncio |
|
import concurrent.futures |
|
from dataclasses import dataclass |
|
import time |
|
|
|
|
|
client = anthropic.Anthropic() |
|
|
|
@dataclass |
|
class ContentRequest: |
|
prompt_key: str |
|
max_tokens: int = 2000 |
|
temperature: float = 0.6 |
|
|
|
class TranscriptProcessor: |
|
def __init__(self): |
|
self.current_prompts = self._load_default_prompts() |
|
|
|
def _load_default_prompts(self) -> Dict[str, str]: |
|
"""Load default prompts from files.""" |
|
return { |
|
key: Path(f"prompts/{key}.txt").read_text() |
|
for key in ["clips", "description", "timestamps", "titles_and_thumbnails"] |
|
} |
|
|
|
def _load_examples(self, filename: str, columns: List[str]) -> str: |
|
"""Load examples from CSV file.""" |
|
try: |
|
df = pd.read_csv(f"data/{filename}") |
|
if len(columns) == 1: |
|
return "\n\n".join(df[columns[0]].dropna().tolist()) |
|
|
|
examples = [] |
|
for _, row in df.iterrows(): |
|
if all(pd.notna(row[col]) for col in columns): |
|
example = "\n".join(f"{col}: {row[col]}" for col in columns) |
|
examples.append(example) |
|
return "\n\n".join(examples) |
|
except Exception as e: |
|
print(f"Error loading {filename}: {str(e)}") |
|
return "" |
|
|
|
async def _generate_content(self, request: ContentRequest, transcript: str) -> str: |
|
"""Generate content using Claude asynchronously.""" |
|
print(f"Starting {request.prompt_key} generation...") |
|
start_time = time.time() |
|
|
|
example_configs = { |
|
"clips": ("Viral Twitter Clips.csv", ["Tweet Text", "Clip Transcript"]), |
|
"description": ("Viral Episode Descriptions.csv", ["Tweet Text"]), |
|
"timestamps": ("Timestamps.csv", ["Timestamps"]), |
|
"titles_and_thumbnails": ("Titles & Thumbnails.csv", ["Titles", "Thumbnail"]), |
|
} |
|
|
|
|
|
full_prompt = self.current_prompts[request.prompt_key] |
|
if config := example_configs.get(request.prompt_key): |
|
if examples := self._load_examples(*config): |
|
full_prompt += f"\n\nPrevious examples:\n{examples}" |
|
|
|
|
|
loop = asyncio.get_event_loop() |
|
with concurrent.futures.ThreadPoolExecutor() as pool: |
|
message = await loop.run_in_executor( |
|
pool, |
|
lambda: client.messages.create( |
|
model="claude-3-5-sonnet-20241022", |
|
max_tokens=request.max_tokens, |
|
temperature=request.temperature, |
|
system=full_prompt, |
|
messages=[{"role": "user", "content": [{"type": "text", "text": f"Process this transcript:\n\n{transcript}"}]}] |
|
) |
|
) |
|
result = message.content[0].text |
|
print(f"Finished {request.prompt_key} in {time.time() - start_time:.2f} seconds") |
|
return result |
|
|
|
def _get_youtube_transcript(self, url: str) -> str: |
|
"""Get transcript from YouTube URL.""" |
|
try: |
|
video_id = re.search( |
|
r"(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/|youtube\.com\/v\/)([A-Za-z0-9_-]+)", |
|
url |
|
).group(1) |
|
transcript = YouTubeTranscriptApi.list_transcripts(video_id).find_transcript(["en"]) |
|
return " ".join(entry["text"] for entry in transcript.fetch()) |
|
except Exception as e: |
|
raise Exception(f"Error fetching YouTube transcript: {str(e)}") |
|
|
|
async def process_transcript(self, input_text: str) -> Tuple[str, str, str, str]: |
|
"""Process input and generate all content.""" |
|
try: |
|
|
|
transcript = ( |
|
self._get_youtube_transcript(input_text) |
|
if any(x in input_text for x in ["youtube.com", "youtu.be"]) |
|
else input_text |
|
) |
|
|
|
|
|
requests = [ |
|
ContentRequest("clips", max_tokens=8192), |
|
ContentRequest("description"), |
|
ContentRequest("timestamps", temperature=0.4), |
|
ContentRequest("titles_and_thumbnails", temperature=0.7), |
|
] |
|
|
|
|
|
results = await asyncio.gather( |
|
*[self._generate_content(req, transcript) for req in requests] |
|
) |
|
return tuple(results) |
|
|
|
except Exception as e: |
|
return (f"Error processing input: {str(e)}",) * 4 |
|
|
|
def update_prompts(self, *values) -> str: |
|
"""Update the current session's prompts.""" |
|
keys = ["clips", "description", "timestamps", "titles_and_thumbnails"] |
|
self.current_prompts = dict(zip(keys, values)) |
|
return "Prompts updated for this session! Changes will reset when you reload the page." |
|
|
|
def create_interface(): |
|
"""Create the Gradio interface.""" |
|
processor = TranscriptProcessor() |
|
|
|
with gr.Blocks(title="Podcast Transcript Analyzer") as app: |
|
with gr.Tab("Generate Content"): |
|
gr.Markdown("# Podcast Content Generator") |
|
input_text = gr.Textbox(label="Input", placeholder="YouTube URL or transcript...", lines=10) |
|
submit_btn = gr.Button("Generate Content") |
|
outputs = [ |
|
gr.Textbox(label=label, lines=10, interactive=False) |
|
for label in ["Twitter Clips", "Twitter Description", "Timestamps", "Title & Thumbnail Suggestions"] |
|
] |
|
|
|
async def process_wrapper(text): |
|
return await processor.process_transcript(text) |
|
|
|
submit_btn.click(fn=process_wrapper, inputs=[input_text], outputs=outputs) |
|
|
|
with gr.Tab("Experiment with Prompts"): |
|
gr.Markdown("# Experiment with Prompts") |
|
gr.Markdown( |
|
""" |
|
Here you can experiment with different prompts during your session. |
|
Changes will remain active until you reload the page. |
|
|
|
Tip: Copy your preferred prompts somewhere safe if you want to reuse them later! |
|
""" |
|
) |
|
|
|
prompt_inputs = [ |
|
gr.Textbox( |
|
label="Clips Prompt", lines=10, value=processor.current_prompts["clips"] |
|
), |
|
gr.Textbox( |
|
label="Description Prompt", |
|
lines=10, |
|
value=processor.current_prompts["description"], |
|
), |
|
gr.Textbox( |
|
label="Timestamps Prompt", |
|
lines=10, |
|
value=processor.current_prompts["timestamps"], |
|
), |
|
gr.Textbox( |
|
label="Titles & Thumbnails Prompt", |
|
lines=10, |
|
value=processor.current_prompts["titles_and_thumbnails"], |
|
), |
|
] |
|
status = gr.Textbox(label="Status", interactive=False) |
|
|
|
|
|
for prompt in prompt_inputs: |
|
prompt.change(fn=processor.update_prompts, inputs=prompt_inputs, outputs=[status]) |
|
|
|
|
|
reset_btn = gr.Button("Reset to Default Prompts") |
|
reset_btn.click( |
|
fn=lambda: ( |
|
processor.update_prompts(*processor.current_prompts.values()), |
|
*processor.current_prompts.values(), |
|
), |
|
outputs=[status] + prompt_inputs, |
|
) |
|
|
|
return app |
|
|
|
if __name__ == "__main__": |
|
create_interface().launch() |
|
|