import os import re from urllib.parse import urlparse from bs4 import BeautifulSoup AUDIO_DIR = "audio_outputs" voice_map = {'grandma GG': 'rKVm0Cb9J2wrzmZupJea', 'tech wizard': 'ocn9CucaUfmmP6Two6Ik', 'perky sidekick': 'DWR3ijzKmphlRUhbBI7t', 'bill the newscaster': 'R1vZMopVRO75M5xBKX52', 'spunky charlie': 'q3yXDjF0aq4JCEo9u2g4', 'sassy teen': 'mBj2IDD9aXruPJHLGCAv'} def sanitize_url(url): if not url.startswith(("http://", "https://")): return "https://" + url return url def extract_internal_links(html_content, base_url): soup = BeautifulSoup(html_content, "html.parser") parsed_base = urlparse(base_url) base_domain = parsed_base.netloc links = set() for tag in soup.find_all("a", href=True): href = tag["href"] parsed_href = urlparse(href) if parsed_href.netloc == "" or parsed_href.netloc == base_domain: full_url = parsed_href.geturl() if not full_url.startswith("http"): full_url = f"{parsed_base.scheme}://{base_domain}{href}" links.add(full_url) return list(links) def crawl_documentation(url): import requests try: response = requests.get(url, timeout=10) response.raise_for_status() return response.text except Exception as e: return f"Error fetching page: {e}" def get_voice_prompt_style(voice): tone = {'grandma GG': 'dry, witty, and brutally honest — will roast you if you mess up.', 'tech wizard': 'cryptic, snarky, and a prodigy with code — speaks in digital spells.', 'perky sidekick': 'energetic, cheerful, and endlessly supportive — like a high-five machine.', 'bill the newscaster': 'polished, confident, and composed — delivers everything like breaking news.', 'spunky charlie': 'wildly curious, playful, and full of devil-may-care energy.', 'sassy teen': 'sarcastic, sharp-tongued, and too cool to care — flexes brainpower with attitude.'} return tone.get(voice.lower(), "neutral") def save_audio_file(audio_path, content): os.makedirs(AUDIO_DIR, exist_ok=True) with open(audio_path, "wb") as f: f.write(content) __all__ = [ "sanitize_url", "extract_internal_links", "crawl_documentation", "get_voice_prompt_style", "save_audio_file", "voice_map", "AUDIO_DIR", ]