Spaces:
Sleeping
Sleeping
import io | |
import re | |
import os | |
import glob | |
import asyncio | |
import hashlib | |
import base64 | |
import unicodedata | |
import streamlit as st | |
from PIL import Image | |
import fitz | |
import edge_tts | |
from reportlab.lib.pagesizes import A4 | |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle | |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
from reportlab.lib import colors | |
from reportlab.pdfbase import pdfmetrics | |
from reportlab.pdfbase.ttfonts import TTFont | |
st.set_page_config(layout="wide", initial_sidebar_state="collapsed") | |
def get_file_title_from_markdown(markdown_text): | |
# Extract the first non-empty header line and clean it for use as a file title. | |
for line in markdown_text.splitlines(): | |
if line.strip().startswith("#"): | |
title = line.strip().lstrip("#").strip() | |
title = re.sub(r'[^\w\s-]', '', title) | |
if title: | |
return title.replace(" ", "_") | |
return "output" | |
async def generate_audio(text, voice, markdown_text): | |
# Generate an audio file using the file title from the markdown. | |
title = get_file_title_from_markdown(markdown_text) | |
filename = f"{title}.mp3" | |
communicate = edge_tts.Communicate(text, voice) | |
await communicate.save(filename) | |
return filename | |
def get_download_link(file, file_type="mp3"): | |
# Generate a base64 download link for a given file. | |
with open(file, "rb") as f: | |
b64 = base64.b64encode(f.read()).decode() | |
if file_type == "mp3": | |
mime = "audio/mpeg" | |
elif file_type == "pdf": | |
mime = "application/pdf" | |
else: | |
mime = "application/octet-stream" | |
return f'<a href="data:{mime};base64,{b64}" download="{os.path.basename(file)}">Download {os.path.basename(file)}</a>' | |
def apply_emoji_font(text, font_name): | |
# Replace emojis in the text with HTML font tags using the specified font. | |
emoji_pattern = re.compile( | |
r"([\U0001F300-\U0001F5FF" | |
r"\U0001F600-\U0001F64F" | |
r"\U0001F680-\U0001F6FF" | |
r"\U0001F700-\U0001F77F" | |
r"\U0001F780-\U0001F7FF" | |
r"\U0001F800-\U0001F8FF" | |
r"\U0001F900-\U0001F9FF" | |
r"\U0001FBA0-\U0001FBAF" # additional range if needed | |
r"\U0001F9C0-\U0001F9C2" | |
r"\u2600-\u26FF" | |
r"\u2700-\u27BF]+)" | |
) | |
def replace_emoji(match): | |
emoji = match.group(1) | |
emoji = unicodedata.normalize('NFC', emoji) | |
return f'<font face="{font_name}">{emoji}</font>' | |
segments = [] | |
last_pos = 0 | |
for match in emoji_pattern.finditer(text): | |
start, end = match.span() | |
if last_pos < start: | |
segments.append(f'<font face="{font_name}">{text[last_pos:start]}</font>') | |
segments.append(replace_emoji(match)) | |
last_pos = end | |
if last_pos < len(text): | |
segments.append(f'<font face="{font_name}">{text[last_pos:]}</font>') | |
return ''.join(segments) | |
def markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers): | |
lines = markdown_text.strip().split('\n') | |
pdf_content = [] | |
number_pattern = re.compile(r'^\d+\.\s') | |
for line in lines: | |
line = line.strip() | |
if not line or line.startswith('# '): | |
continue | |
if render_with_bold: | |
# Replace markdown bold markers with HTML bold tags. | |
line = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line) | |
if auto_bold_numbers and number_pattern.match(line): | |
if not (line.startswith("<b>") and line.endswith("</b>")): | |
line = f"<b>{line}</b>" | |
pdf_content.append(line) | |
total_lines = len(pdf_content) | |
return pdf_content, total_lines | |
def create_pdf(markdown_text, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns, emoji_font): | |
buffer = io.BytesIO() | |
page_width = A4[0] * 2 | |
page_height = A4[1] | |
doc = SimpleDocTemplate(buffer, pagesize=(page_width, page_height), | |
leftMargin=36, rightMargin=36, topMargin=36, bottomMargin=36) | |
styles = getSampleStyleSheet() | |
spacer_height = 10 | |
pdf_content, total_lines = markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers) | |
# Define paragraph styles. | |
item_style = ParagraphStyle( | |
'ItemStyle', parent=styles['Normal'], fontName=emoji_font, | |
fontSize=base_font_size, leading=base_font_size * 1.15, spaceAfter=1 | |
) | |
bold_style = ParagraphStyle( | |
'BoldStyle', parent=styles['Normal'], fontName="NotoEmoji-Bold", | |
fontSize=base_font_size, leading=base_font_size * 1.15, spaceAfter=1 | |
) | |
numbered_bold_style = ParagraphStyle( | |
'NumberedBoldStyle', parent=styles['Normal'], fontName="NotoEmoji-Bold", | |
fontSize=base_font_size + 1 if enlarge_numbered else base_font_size, | |
leading=(base_font_size + 1) * 1.15 if enlarge_numbered else base_font_size * 1.15, | |
spaceAfter=1 | |
) | |
section_style = ParagraphStyle( | |
'SectionStyle', parent=styles['Heading2'], fontName=emoji_font, | |
textColor=colors.darkblue, fontSize=base_font_size * 1.1, | |
leading=base_font_size * 1.32, spaceAfter=2 | |
) | |
try: | |
available_font_files = glob.glob("*.ttf") | |
if not available_font_files: | |
st.error("No .ttf font files found in the current directory.") | |
return | |
# Register the bold emoji font. | |
selected_bold_font = None | |
for f in available_font_files: | |
if "NotoEmoji-Bold" in f: | |
selected_bold_font = f | |
break | |
if selected_bold_font: | |
pdfmetrics.registerFont(TTFont("NotoEmoji-Bold", selected_bold_font)) | |
# Register the primary font (assume the chosen emoji font is available as a .ttf file). | |
pdfmetrics.registerFont(TTFont(emoji_font, available_font_files[0])) | |
except Exception as e: | |
st.error(f"Font registration error: {e}") | |
return | |
# Split content into columns. | |
columns = [[] for _ in range(num_columns)] | |
lines_per_column = total_lines / num_columns if num_columns > 0 else total_lines | |
current_line_count = 0 | |
current_column = 0 | |
number_pattern = re.compile(r'^\d+\.\s') | |
for item in pdf_content: | |
if current_line_count >= lines_per_column and current_column < num_columns - 1: | |
current_column += 1 | |
current_line_count = 0 | |
columns[current_column].append(item) | |
current_line_count += 1 | |
column_cells = [[] for _ in range(num_columns)] | |
for col_idx, column in enumerate(columns): | |
for item in column: | |
# If the item is bold (wrapped in <b> tags), use bold style; otherwise, use the selected emoji font. | |
if isinstance(item, str) and item.startswith("<b>") and item.endswith("</b>"): | |
content = item[3:-4].strip() | |
if number_pattern.match(content): | |
column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), numbered_bold_style)) | |
else: | |
column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), section_style)) | |
else: | |
column_cells[col_idx].append(Paragraph(apply_emoji_font(item, emoji_font), item_style)) | |
max_cells = max(len(cells) for cells in column_cells) if column_cells else 0 | |
for cells in column_cells: | |
cells.extend([Paragraph("", item_style)] * (max_cells - len(cells))) | |
col_width = (page_width - 72) / num_columns if num_columns > 0 else page_width - 72 | |
table_data = list(zip(*column_cells)) if column_cells else [[]] | |
table = Table(table_data, colWidths=[col_width] * num_columns, hAlign='CENTER') | |
table.setStyle(TableStyle([ | |
('VALIGN', (0, 0), (-1, -1), 'TOP'), | |
('ALIGN', (0, 0), (-1, -1), 'LEFT'), | |
('BACKGROUND', (0, 0), (-1, -1), colors.white), | |
('GRID', (0, 0), (-1, -1), 0, colors.white), | |
('LINEAFTER', (0, 0), (num_columns-1, -1), 0.5, colors.grey), | |
('LEFTPADDING', (0, 0), (-1, -1), 2), | |
('RIGHTPADDING', (0, 0), (-1, -1), 2), | |
('TOPPADDING', (0, 0), (-1, -1), 1), | |
('BOTTOMPADDING', (0, 0), (-1, -1), 1), | |
])) | |
story = [Spacer(1, spacer_height), table] | |
doc.build(story) | |
buffer.seek(0) | |
return buffer.getvalue() | |
def pdf_to_image(pdf_bytes): | |
try: | |
doc = fitz.open(stream=pdf_bytes, filetype="pdf") | |
images = [] | |
for page in doc: | |
pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0)) | |
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) | |
images.append(img) | |
doc.close() | |
return images | |
except Exception as e: | |
st.error(f"Failed to render PDF preview: {e}") | |
return None | |
# Auto-detect the default markdown file from available .md files. | |
md_files = [f for f in glob.glob("*.md") if os.path.basename(f) != "README.md"] | |
md_options = [os.path.splitext(os.path.basename(f))[0] for f in md_files] | |
if md_options: | |
if 'markdown_content' not in st.session_state or not st.session_state.markdown_content: | |
with open(f"{md_options[0]}.md", "r", encoding="utf-8") as f: | |
st.session_state.markdown_content = f.read() | |
else: | |
st.session_state.markdown_content = "" | |
with st.sidebar: | |
st.markdown("### PDF Options") | |
selected_md = st.selectbox("Select Markdown File", options=md_options, index=0 if md_options else -1) | |
available_font_files = {os.path.splitext(os.path.basename(f))[0]: f for f in glob.glob("*.ttf")} | |
# Use the selected emoji font from sidebar. | |
selected_font_name = st.selectbox("Select Emoji Font", options=list(available_font_files.keys()), | |
index=list(available_font_files.keys()).index("NotoEmoji-Bold") if "NotoEmoji-Bold" in available_font_files else 0) | |
base_font_size = st.slider("Font Size (points)", min_value=6, max_value=16, value=8, step=1) | |
render_with_bold = st.checkbox("Render with Bold Formatting (remove ** markers)", value=True, key="render_with_bold") | |
auto_bold_numbers = st.checkbox("Auto Bold Numbered Lines", value=True, key="auto_bold_numbers") | |
enlarge_numbered = st.checkbox("Enlarge Font Size for Numbered Lines", value=True, key="enlarge_numbered") | |
num_columns = st.selectbox("Number of Columns", options=[1, 2, 3, 4, 5, 6], index=3) | |
if md_options and selected_md: | |
with open(f"{selected_md}.md", "r", encoding="utf-8") as f: | |
st.session_state.markdown_content = f.read() | |
edited_markdown = st.text_area("Modify the markdown content below:", | |
value=st.session_state.markdown_content, | |
height=300, key=f"markdown_{selected_md}_{selected_font_name}_{num_columns}") | |
if st.button("Update PDF"): | |
st.session_state.markdown_content = edited_markdown | |
if md_options and selected_md: | |
with open(f"{selected_md}.md", "w", encoding="utf-8") as f: | |
f.write(edited_markdown) | |
st.experimental_rerun() | |
st.download_button(label="Save Markdown", data=st.session_state.markdown_content, | |
file_name=f"{selected_md}.md" if selected_md else "default.md", mime="text/markdown") | |
st.markdown("### Text-to-Speech") | |
VOICES = ["en-US-AriaNeural", "en-US-JennyNeural", "en-GB-SoniaNeural", "en-US-GuyNeural", "en-US-AnaNeural"] | |
selected_voice = st.selectbox("Select Voice for TTS", options=VOICES, index=0) | |
if st.button("Generate Audio"): | |
# Generate audio using the full markdown content as text. | |
audio_file = asyncio.run(generate_audio(st.session_state.markdown_content, selected_voice, st.session_state.markdown_content)) | |
st.audio(audio_file) | |
with open(audio_file, "rb") as f: | |
audio_bytes = f.read() | |
st.download_button("Download Audio", data=audio_bytes, | |
file_name=os.path.basename(audio_file), mime="audio/mpeg") | |
if st.button("Save PDF"): | |
title = get_file_title_from_markdown(st.session_state.markdown_content) | |
pdf_filename = f"{title}.pdf" | |
with open(pdf_filename, "wb") as f: | |
f.write(pdf_bytes) | |
st.success(f"Saved PDF as {pdf_filename}") | |
st.experimental_rerun() | |
st.markdown("### Saved Audio Files") | |
mp3_files = glob.glob("*.mp3") | |
for mp3 in mp3_files: | |
st.audio(mp3) | |
st.markdown(get_download_link(mp3, "mp3"), unsafe_allow_html=True) | |
if st.button("Delete All MP3"): | |
for mp3 in mp3_files: | |
try: | |
os.remove(mp3) | |
except Exception as e: | |
st.error(f"Error deleting {mp3}: {e}") | |
st.experimental_rerun() | |
st.markdown("### Saved PDF Files") | |
pdf_files = glob.glob("*.pdf") | |
for pdf in pdf_files: | |
st.markdown(get_download_link(pdf, "pdf"), unsafe_allow_html=True) | |
with st.spinner("Generating PDF..."): | |
pdf_bytes = create_pdf(st.session_state.markdown_content, base_font_size, render_with_bold, | |
auto_bold_numbers, enlarge_numbered, num_columns, selected_font_name) | |
with st.container(): | |
pdf_images = pdf_to_image(pdf_bytes) | |
if pdf_images: | |
for img in pdf_images: | |
st.image(img, use_container_width=True) | |
else: | |
st.info("Download the PDF to view it locally.") | |
with st.sidebar: | |
st.download_button(label="Download PDF", data=pdf_bytes, file_name="output.pdf", mime="application/pdf") | |