awacke1's picture
Create app.py
5978ca5 verified
raw
history blame
13.6 kB
import io
import os
import re
import glob
import textwrap
from datetime import datetime
from pathlib import Path
from contextlib import redirect_stdout
import streamlit as st
import pandas as pd
from PIL import Image
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from reportlab.lib.utils import ImageReader
import mistune
from gtts import gTTS
# --- Core Utility Functions ---
# πŸͺ„ Now you see it, now you don't.
def delete_asset(path: str):
if os.path.exists(path):
os.remove(path)
st.rerun()
# πŸ•΅οΈβ€β™‚οΈ On the hunt for related files in the digital jungle.
def get_project_files(pattern: str = "*_*.*") -> dict:
projects = {}
for f in glob.glob(pattern):
stem = Path(f).stem
project_name = '_'.join(stem.split('_')[:-1])
if project_name not in projects:
projects[project_name] = {'md': [], 'images': []}
ext = Path(f).suffix.lower()
if ext == '.md':
projects[project_name]['md'].append(f)
elif ext in ['.png', '.jpg', '.jpeg']:
projects[project_name]['images'].append(f)
return projects
# ✨ Turning markdown into its simple, unadorned soul.
def md_to_plain_text(md_text: str) -> str:
if not md_text:
return ""
html = mistune.html(md_text)
return re.sub(r'<[^>]+>', '', html).strip()
# --- PDF & Audio Generation ---
# πŸŽ™οΈ Lending a golden voice to your silent words.
def generate_audio(text: str, filename_stem: str, lang: str, slow: bool) -> str | None:
if not text:
st.warning("No text provided to generate audio.")
return None
voice_file = f"{filename_stem}.mp3"
try:
tts = gTTS(text=text, lang=lang, slow=slow)
tts.save(voice_file)
return voice_file
except Exception as e:
st.error(f"Failed to generate audio: {e}")
return None
# ✍️ Weaving words into PDF poetry.
def render_pdf_text(c: canvas.Canvas, text: str, settings: dict):
page_w, page_h = letter
margin = 40
gutter = 20
col_w = (page_w - 2 * margin - (settings['columns'] - 1) * gutter) / settings['columns']
c.setFont(settings['font_family'], settings['font_size'])
line_height = settings['font_size'] * 1.2
wrap_width = int(col_w / (settings['font_size'] * 0.6))
y = page_h - margin
col = 0
x = margin
for paragraph in text.split("\n"):
wrapped_lines = textwrap.wrap(paragraph, wrap_width, replace_whitespace=False, drop_whitespace=False)
if not wrapped_lines:
y -= line_height
if y < margin:
col, x, y = new_pdf_page_or_column(c, settings, col, margin, col_w, gutter, page_h)
for line in wrapped_lines:
if y < margin:
col, x, y = new_pdf_page_or_column(c, settings, col, margin, col_w, gutter, page_h)
c.drawString(x, y, line)
y -= line_height
# πŸ“„ Time to turn the page, or at least scoot over.
def new_pdf_page_or_column(c, settings, col, margin, col_w, gutter, page_h):
col += 1
if col >= settings['columns']:
c.showPage()
c.setFont(settings['font_family'], settings['font_size'])
col = 0
x = margin + col * (col_w + gutter)
y = page_h - margin
return col, x, y
# πŸ–ΌοΈ Arranging your pixels perfectly on the page.
def render_pdf_images(c: canvas.Canvas, image_files: list):
for img_file in image_files:
try:
img = Image.open(img_file)
w, h = img.size
c.showPage()
c.setPageSize((w, h))
c.drawImage(ImageReader(img), 0, 0, w, h, preserveAspectRatio=True, mask='auto')
except Exception as e:
st.warning(f"Could not process image {img_file.name}: {e}")
continue
# πŸ“œ The grand finale: text and images join forces in a PDF.
def generate_pdf_from_content(text: str, images: list, settings: dict, filename_stem: str) -> bytes:
buf = io.BytesIO()
c = canvas.Canvas(buf, pagesize=letter)
if text:
render_pdf_text(c, text, settings)
if images:
render_pdf_images(c, images)
c.save()
buf.seek(0)
return buf.getvalue()
# --- Streamlit UI Components ---
# πŸŽ›οΈ Organizing the mission control for your creative genius.
def setup_sidebar(projects: dict):
st.sidebar.header("🎨 PDF Style Settings")
settings = {
'columns': st.sidebar.slider("Text columns", 1, 3, 1),
'font_family': st.sidebar.selectbox("Font Family", ["Helvetica", "Times-Roman", "Courier"]),
'font_size': st.sidebar.slider("Font Size", 6, 24, 12)
}
st.sidebar.header("πŸ“‚ Project Files")
st.sidebar.caption("Files matching the `Name_Date` pattern.")
if not projects:
st.sidebar.info("No projects found. Upload files with a `_` in the name.")
else:
sorted_projects = sorted(projects.items())
for name, files in sorted_projects:
with st.sidebar.expander(f"Project: {name}"):
if files['md']:
st.write("πŸ“„ " + ", ".join(Path(f).name for f in files['md']))
if files['images']:
st.write("πŸ–ΌοΈ " + ", ".join(Path(f).name for f in files['images']))
return settings
# πŸ† Putting your magnificent creations on display.
def display_local_assets():
st.markdown("---")
st.subheader("πŸ“‚ Available Assets")
assets = sorted(glob.glob("*.pdf") + glob.glob("*.mp3"))
if not assets:
st.info("No PDFs or MP3s generated yet.")
return
for asset_path in assets:
ext = Path(asset_path).suffix.lower()
cols = st.columns([4, 2, 1])
cols[0].write(f"`{asset_path}`")
with open(asset_path, 'rb') as f:
file_bytes = f.read()
if ext == '.pdf':
cols[1].download_button("⬇️ Download PDF", data=file_bytes, file_name=asset_path, mime="application/pdf")
elif ext == '.mp3':
cols[1].audio(file_bytes, format='audio/mpeg')
cols[2].button("πŸ—‘οΈ Delete", key=f"del_{asset_path}", on_click=delete_asset, args=(asset_path,))
# 🎭 The main stage for our PDF and Voice show.
def pdf_composer_tab(projects: dict):
st.header("πŸ“„ PDF Composer & Voice Generator πŸš€")
col1, col2 = st.columns(2)
with col1:
input_method = st.radio(
"Choose your content source:",
["Select an existing project", "Upload new files or paste text"],
horizontal=True,
label_visibility="collapsed"
)
md_text = ""
selected_images = []
filename_stem = datetime.now().strftime('%Y%m%d_%H%M%S')
if input_method == "Select an existing project" and projects:
sorted_project_names = sorted(projects.keys())
chosen_project = st.selectbox("Select Project", sorted_project_names)
md_files = projects[chosen_project]['md']
if md_files:
md_path = md_files[0]
filename_stem = Path(md_path).stem
with open(md_path, 'r', encoding='utf-8') as f:
md_text = f.read()
st.text_area("Markdown Content", value=md_text, height=250, key="md_from_project")
image_files = projects[chosen_project]['images']
if image_files:
st.info(f"Found {len(image_files)} related image(s):")
for img in image_files:
st.image(img, width=150, caption=Path(img).name)
with open(img, 'rb') as f:
bytes_io = io.BytesIO(f.read())
bytes_io.name = Path(img).name
selected_images.append(bytes_io)
else:
st.info("Upload a Markdown file, or just paste your text below.")
uploaded_files = st.file_uploader(
"Upload Markdown (.md) and Image files (.png, .jpg)",
type=["md", "png", "jpg", "jpeg"],
accept_multiple_files=True
)
md_from_upload = [f for f in uploaded_files if f.type == "text/markdown"]
images_from_upload = [f for f in uploaded_files if f.type != "text/markdown"]
if md_from_upload:
md_file = md_from_upload[0]
md_text = md_file.getvalue().decode("utf-8")
filename_stem = Path(md_file.name).stem
md_text = st.text_area("Markdown Text", value=md_text, height=250, key="md_from_paste")
selected_images.extend(images_from_upload)
if selected_images:
st.subheader("πŸ–ΌοΈ Arrange Images")
st.caption("Drag rows to reorder the images for the PDF.")
df_imgs = pd.DataFrame([{"order": i + 1, "name": f.name, "preview": f} for i, f in enumerate(selected_images)])
edited_df = st.data_editor(
df_imgs,
column_config={"preview": st.column_config.ImageColumn("Preview")},
hide_index=True,
use_container_width=True,
num_rows="dynamic"
)
ordered_names = edited_df['name'].tolist()
selected_images.sort(key=lambda x: ordered_names.index(x.name))
plain_text = md_to_plain_text(md_text)
st.markdown("---")
st.subheader("🎬 Generate Your Files")
pdf_settings = setup_sidebar(projects)
pdf_col, voice_col = st.columns(2)
with pdf_col:
if st.button("πŸ–‹οΈ Generate PDF", use_container_width=True, type="primary"):
if not plain_text and not selected_images:
st.error("Cannot generate an empty PDF. Please add some text or images.")
else:
with st.spinner("Crafting your PDF..."):
pdf_bytes = generate_pdf_from_content(plain_text, selected_images, pdf_settings, filename_stem)
st.download_button(
label="⬇️ Download PDF",
data=pdf_bytes,
file_name=f"{filename_stem}.pdf",
mime="application/pdf",
use_container_width=True
)
st.success("PDF is ready for download!")
with voice_col:
st.markdown("<h6>Voice Generation</h6>", unsafe_allow_html=True)
languages = {"English (US)": "en", "English (UK)": "en-gb", "Spanish": "es"}
voice_choice = st.selectbox("Voice Language", list(languages.keys()))
slow_speech = st.checkbox("Slow Speech")
if st.button("πŸ”Š Generate MP3", use_container_width=True):
with st.spinner("Converting text to speech..."):
audio_file = generate_audio(plain_text, filename_stem, languages[voice_choice], slow_speech)
if audio_file:
st.success("MP3 generated!")
with open(audio_file, 'rb') as mp3:
st.download_button("πŸ“₯ Download MP3", data=mp3, file_name=audio_file, mime="audio/mpeg", use_container_width=True)
st.audio(audio_file)
display_local_assets()
# --- Code Interpreter Section ---
# πŸ‘» Catching code spirits in a bottle (or a buffer).
def execute_code(code: str) -> tuple[str | None, str | None]:
buf = io.StringIO()
try:
with redirect_stdout(buf):
exec(code, {})
return buf.getvalue(), None
except Exception as e:
return None, str(e)
# 🐍 Finding the sneaky Python hidden in the markdown grass.
def extract_python_code(md: str) -> list[str]:
return re.findall(r"```python\s*(.*?)```", md, re.DOTALL)
# πŸ§ͺ A safe lab for your wild Python experiments.
def code_interpreter_tab():
st.header("🐍 Python Code Executor")
st.info("Execute Python code snippets. Note: This runs within the Streamlit environment.")
DEFAULT_CODE = 'import streamlit as st\n\nst.balloons()\nst.write("Hello from the code interpreter!")'
if 'code' not in st.session_state:
st.session_state.code = DEFAULT_CODE
uploaded_file = st.file_uploader("Upload .py or .md file with Python code", type=['py', 'md'])
if uploaded_file:
text = uploaded_file.getvalue().decode()
if uploaded_file.type == 'text/markdown':
codes = extract_python_code(text)
st.session_state.code = codes[0] if codes else ''
else:
st.session_state.code = text
st.session_state.code = st.text_area("Code Editor", value=st.session_state.code, height=300, key="code_editor")
run_col, clear_col = st.columns(2)
if run_col.button("▢️ Run Code", use_container_width=True, type="primary"):
output, error = execute_code(st.session_state.code)
if error:
st.error(f"Execution Failed:\n\n{error}")
elif output:
st.subheader("Output")
st.code(output, language=None)
else:
st.success("βœ… Executed successfully with no output.")
if clear_col.button("πŸ—‘οΈ Clear Code", use_container_width=True):
st.session_state.code = ''
st.rerun()
# --- Main App ---
# 🎬 Lights, camera, action! Kicking off the whole show.
def main():
st.set_page_config(page_title="PDF & Code Interpreter", layout="wide", page_icon="πŸš€")
project_files = get_project_files()
tab1, tab2 = st.tabs(["πŸ“„ PDF Composer", "πŸ§ͺ Code Interpreter"])
with tab1:
pdf_composer_tab(project_files)
with tab2:
code_interpreter_tab()
if __name__ == "__main__":
main()