import re from bs4 import BeautifulSoup def parse_html_prompt(input_str): soup = BeautifulSoup(input_str, "html.parser") # 处理

内的内容 p_content = soup.find("p").decode_contents().replace("
", "\n") p_content = re.sub(r']*>(.*?)', r'<\1>', p_content) template = p_content.strip().replace('
', '').replace(' ', '').replace('
', '') # 解析 component-item components = {} for item in soup.find_all("div", class_="component-item"): key_span = item.find("div", class_="component-key").find("span") key = key_span.get_text(strip=True) if key_span else "" value_div = item.find("div", class_="component-value") value_content = value_div.decode_contents() value_content = re.sub(r']*>(.*?)', r'{\1}', value_content) components[key] = value_content.strip().replace('
', '').replace('
', '') # 解析 self-info-item self_prompt = {} for item in soup.find_all("div", class_="self-info-item"): key_span = item.find("div", class_="component-key").find("span") key = key_span.get_text(strip=True) if key_span else "" value_div = item.find("div", class_="component-value") value = value_div.get_text(strip=True) if value_div else "" self_prompt[key] = value.replace('
', '').replace('
', '') return { 'template': template, 'components': components, 'self_prompt': self_prompt } #print(parse_html(info))