Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,7 @@ import streamlit as st
|
|
9 |
import pandas as pd
|
10 |
from PIL import Image
|
11 |
from reportlab.pdfgen import canvas
|
12 |
-
from reportlab.lib.pagesizes import letter, A4
|
13 |
from reportlab.lib.utils import ImageReader
|
14 |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image as ReportLabImage
|
15 |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
@@ -130,12 +130,28 @@ def markdown_to_pdf_content(markdown_text, add_space_before_numbered, headings_t
|
|
130 |
total_lines = len(pdf_content)
|
131 |
return pdf_content, total_lines
|
132 |
|
133 |
-
def create_pdf(markdown_texts, image_files, base_font_size=14, num_columns=2, add_space_before_numbered=True, headings_to_fonts=True, doc_title="Combined Document"):
|
134 |
if not markdown_texts and not image_files:
|
135 |
return None
|
136 |
buffer = io.BytesIO()
|
137 |
-
|
138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
doc = SimpleDocTemplate(
|
140 |
buffer,
|
141 |
pagesize=(page_width, page_height),
|
@@ -255,8 +271,8 @@ def create_pdf(markdown_texts, image_files, base_font_size=14, num_columns=2, ad
|
|
255 |
try:
|
256 |
img = Image.open(img_path)
|
257 |
img_width, img_height = img.size
|
258 |
-
|
259 |
-
scale = min((
|
260 |
new_width = img_width * scale
|
261 |
new_height = img_height * scale
|
262 |
story.append(ReportLabImage(img_path, width=new_width, height=new_height))
|
@@ -312,36 +328,94 @@ tab1, tab2 = st.tabs(["๐ PDF Composer", "๐งช Code Interpreter"])
|
|
312 |
|
313 |
with tab1:
|
314 |
st.header("๐ PDF Composer & Voice Generator ๐")
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
324 |
else:
|
325 |
-
|
|
|
|
|
|
|
|
|
326 |
stem = datetime.now().strftime('%Y%m%d_%H%M%S')
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
332 |
# Voice settings
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
339 |
if st.button("๐ Generate & Download Voice MP3 from Text"):
|
340 |
if plain_text.strip():
|
341 |
voice_file = f"{stem}_{selected_voice}.mp3"
|
342 |
try:
|
343 |
-
|
344 |
-
|
|
|
|
|
345 |
st.audio(audio_file)
|
346 |
with open(audio_file, 'rb') as mp3:
|
347 |
st.download_button("๐ฅ Download MP3", data=mp3, file_name=voice_file, mime="audio/mpeg")
|
@@ -349,10 +423,13 @@ with tab1:
|
|
349 |
st.error(f"Error generating voice: {e}")
|
350 |
else:
|
351 |
st.warning("No text to generate voice from.")
|
|
|
352 |
# Image uploads and ordering
|
|
|
353 |
imgs = st.file_uploader("Upload Images for PDF", type=["png", "jpg", "jpeg"], accept_multiple_files=True)
|
354 |
ordered_images = []
|
355 |
if imgs:
|
|
|
356 |
df_imgs = pd.DataFrame([{"name": f.name, "order": i} for i, f in enumerate(imgs)])
|
357 |
edited = st.data_editor(df_imgs, use_container_width=True, num_rows="dynamic")
|
358 |
for _, row in edited.sort_values("order").iterrows():
|
@@ -360,62 +437,111 @@ with tab1:
|
|
360 |
if f.name == row['name']:
|
361 |
ordered_images.append(f)
|
362 |
break
|
363 |
-
|
364 |
-
|
365 |
-
|
|
|
|
|
|
|
|
|
366 |
else:
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
c.
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
419 |
st.markdown("---")
|
420 |
st.subheader("๐ Available Assets")
|
421 |
all_assets = glob.glob("*.*")
|
@@ -455,24 +581,25 @@ with tab1:
|
|
455 |
if not st.session_state.selected_assets:
|
456 |
st.warning("Please select at least one asset to generate a PDF.")
|
457 |
else:
|
458 |
-
|
459 |
image_files = []
|
460 |
for a in st.session_state.selected_assets:
|
461 |
ext = a.split('.')[-1].lower()
|
462 |
if ext == 'md':
|
463 |
with open(a, 'r', encoding='utf-8') as f:
|
464 |
-
|
465 |
elif ext in ['png', 'jpg', 'jpeg']:
|
466 |
image_files.append(a)
|
467 |
with st.spinner("Generating PDF from selected assets..."):
|
468 |
pdf_bytes = create_pdf(
|
469 |
-
markdown_texts=
|
470 |
image_files=image_files,
|
471 |
-
base_font_size=
|
472 |
-
num_columns=
|
473 |
add_space_before_numbered=True,
|
474 |
headings_to_fonts=True,
|
475 |
-
doc_title="Combined_Selected_Assets"
|
|
|
476 |
)
|
477 |
if pdf_bytes:
|
478 |
pdf_images = pdf_to_image(pdf_bytes)
|
@@ -489,6 +616,7 @@ with tab1:
|
|
489 |
)
|
490 |
else:
|
491 |
st.error("Failed to generate PDF.")
|
|
|
492 |
st.markdown("---")
|
493 |
st.subheader("๐ผ Image Gallery")
|
494 |
image_files = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg")
|
@@ -505,6 +633,7 @@ with tab1:
|
|
505 |
st.warning(f"Could not load image {image_file}: {e}")
|
506 |
else:
|
507 |
st.info("No images found in the current directory.")
|
|
|
508 |
st.markdown("---")
|
509 |
st.subheader("๐ฅ Video Gallery")
|
510 |
video_files = glob.glob("*.mp4")
|
|
|
9 |
import pandas as pd
|
10 |
from PIL import Image
|
11 |
from reportlab.pdfgen import canvas
|
12 |
+
from reportlab.lib.pagesizes import letter, A4, legal, A3, A5, LETTER, LEGAL
|
13 |
from reportlab.lib.utils import ImageReader
|
14 |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image as ReportLabImage
|
15 |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
|
|
130 |
total_lines = len(pdf_content)
|
131 |
return pdf_content, total_lines
|
132 |
|
133 |
+
def create_pdf(markdown_texts, image_files, base_font_size=14, num_columns=2, add_space_before_numbered=True, headings_to_fonts=True, doc_title="Combined Document", page_size=A4):
|
134 |
if not markdown_texts and not image_files:
|
135 |
return None
|
136 |
buffer = io.BytesIO()
|
137 |
+
|
138 |
+
# Use the selected page size
|
139 |
+
if page_size == "A4":
|
140 |
+
page_dimensions = A4
|
141 |
+
elif page_size == "Letter":
|
142 |
+
page_dimensions = letter
|
143 |
+
elif page_size == "Legal":
|
144 |
+
page_dimensions = legal
|
145 |
+
elif page_size == "A3":
|
146 |
+
page_dimensions = A3
|
147 |
+
elif page_size == "A5":
|
148 |
+
page_dimensions = A5
|
149 |
+
else:
|
150 |
+
page_dimensions = A4 # Default fallback
|
151 |
+
|
152 |
+
page_width = page_dimensions[0] * 2
|
153 |
+
page_height = page_dimensions[1]
|
154 |
+
|
155 |
doc = SimpleDocTemplate(
|
156 |
buffer,
|
157 |
pagesize=(page_width, page_height),
|
|
|
271 |
try:
|
272 |
img = Image.open(img_path)
|
273 |
img_width, img_height = img.size
|
274 |
+
page_width_img, page_height_img = page_dimensions
|
275 |
+
scale = min((page_width_img - 40) / img_width, (page_height_img - 40) / img_height)
|
276 |
new_width = img_width * scale
|
277 |
new_height = img_height * scale
|
278 |
story.append(ReportLabImage(img_path, width=new_width, height=new_height))
|
|
|
328 |
|
329 |
with tab1:
|
330 |
st.header("๐ PDF Composer & Voice Generator ๐")
|
331 |
+
|
332 |
+
# Sidebar PDF settings
|
333 |
+
with st.sidebar:
|
334 |
+
st.subheader("๐ PDF Settings")
|
335 |
+
columns = st.slider("Text columns", 1, 3, 2)
|
336 |
+
font_family = st.selectbox("Font", ["Helvetica", "Times-Roman", "Courier", "DejaVuSans"])
|
337 |
+
font_size = st.slider("Font size", 6, 24, 14)
|
338 |
+
|
339 |
+
# Page size selection
|
340 |
+
page_size_options = {
|
341 |
+
"A4 (210 ร 297 mm)": "A4",
|
342 |
+
"Letter (8.5 ร 11 in)": "Letter",
|
343 |
+
"Legal (8.5 ร 14 in)": "Legal",
|
344 |
+
"A3 (297 ร 420 mm)": "A3",
|
345 |
+
"A5 (148 ร 210 mm)": "A5"
|
346 |
+
}
|
347 |
+
selected_page_size = st.selectbox(
|
348 |
+
"๐ Page Size",
|
349 |
+
options=list(page_size_options.keys()),
|
350 |
+
index=0 # Default to A4
|
351 |
+
)
|
352 |
+
page_size = page_size_options[selected_page_size]
|
353 |
+
|
354 |
+
# Multiple markdown file upload
|
355 |
+
md_files = st.file_uploader("Upload Markdown Files (.md)", type=["md"], accept_multiple_files=True)
|
356 |
+
markdown_texts = []
|
357 |
+
combined_text = ""
|
358 |
+
|
359 |
+
if md_files:
|
360 |
+
st.subheader(f"๐ Uploaded Files ({len(md_files)})")
|
361 |
+
for i, md_file in enumerate(md_files):
|
362 |
+
md_text = md_file.getvalue().decode("utf-8")
|
363 |
+
markdown_texts.append(md_text)
|
364 |
+
combined_text += md_text + "\n\n"
|
365 |
+
|
366 |
+
with st.expander(f"๐ {md_file.name}"):
|
367 |
+
st.markdown(md_text[:500] + "..." if len(md_text) > 500 else md_text)
|
368 |
+
|
369 |
+
stem = f"combined_{len(md_files)}_files_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
370 |
else:
|
371 |
+
# Single text area for manual input
|
372 |
+
manual_text = st.text_area("Or enter markdown text directly", height=200)
|
373 |
+
if manual_text:
|
374 |
+
markdown_texts = [manual_text]
|
375 |
+
combined_text = manual_text
|
376 |
stem = datetime.now().strftime('%Y%m%d_%H%M%S')
|
377 |
+
|
378 |
+
# Convert Markdown to plain text for voice generation
|
379 |
+
if combined_text:
|
380 |
+
renderer = mistune.HTMLRenderer()
|
381 |
+
markdown = mistune.create_markdown(renderer=renderer)
|
382 |
+
html = markdown(combined_text)
|
383 |
+
plain_text = re.sub(r'<[^>]+>', '', html)
|
384 |
+
|
385 |
+
st.subheader("๐ Content Summary")
|
386 |
+
col1, col2, col3, col4 = st.columns(4)
|
387 |
+
with col1:
|
388 |
+
st.metric("Files", len(md_files) if md_files else 1)
|
389 |
+
with col2:
|
390 |
+
st.metric("Total Characters", len(combined_text))
|
391 |
+
with col3:
|
392 |
+
st.metric("Estimated Words", len(combined_text.split()))
|
393 |
+
with col4:
|
394 |
+
st.metric("Page Size", selected_page_size.split(" (")[0])
|
395 |
+
else:
|
396 |
+
plain_text = ""
|
397 |
+
|
398 |
# Voice settings
|
399 |
+
st.subheader("๐ Text-to-Speech Settings")
|
400 |
+
col1, col2 = st.columns(2)
|
401 |
+
with col1:
|
402 |
+
languages = {"English (US)": "en", "English (UK)": "en-uk", "Spanish": "es"}
|
403 |
+
voice_choice = st.selectbox("Voice Language", list(languages.keys()))
|
404 |
+
voice_lang = languages[voice_choice]
|
405 |
+
slow = st.checkbox("Slow Speech")
|
406 |
+
|
407 |
+
with col2:
|
408 |
+
VOICES = ["en-US-AriaNeural", "en-US-JennyNeural", "en-GB-SoniaNeural", "en-US-GuyNeural", "en-US-AnaNeural"]
|
409 |
+
selected_voice = st.selectbox("Select Voice for TTS", options=VOICES, index=0)
|
410 |
+
|
411 |
if st.button("๐ Generate & Download Voice MP3 from Text"):
|
412 |
if plain_text.strip():
|
413 |
voice_file = f"{stem}_{selected_voice}.mp3"
|
414 |
try:
|
415 |
+
with st.spinner("Generating audio..."):
|
416 |
+
cleaned_text = clean_for_speech(plain_text)
|
417 |
+
audio_file = asyncio.run(generate_audio(cleaned_text, selected_voice, voice_file))
|
418 |
+
st.success("Audio generated successfully!")
|
419 |
st.audio(audio_file)
|
420 |
with open(audio_file, 'rb') as mp3:
|
421 |
st.download_button("๐ฅ Download MP3", data=mp3, file_name=voice_file, mime="audio/mpeg")
|
|
|
423 |
st.error(f"Error generating voice: {e}")
|
424 |
else:
|
425 |
st.warning("No text to generate voice from.")
|
426 |
+
|
427 |
# Image uploads and ordering
|
428 |
+
st.subheader("๐ผ๏ธ Image Management")
|
429 |
imgs = st.file_uploader("Upload Images for PDF", type=["png", "jpg", "jpeg"], accept_multiple_files=True)
|
430 |
ordered_images = []
|
431 |
if imgs:
|
432 |
+
st.write(f"๐ Uploaded {len(imgs)} images")
|
433 |
df_imgs = pd.DataFrame([{"name": f.name, "order": i} for i, f in enumerate(imgs)])
|
434 |
edited = st.data_editor(df_imgs, use_container_width=True, num_rows="dynamic")
|
435 |
for _, row in edited.sort_values("order").iterrows():
|
|
|
437 |
if f.name == row['name']:
|
438 |
ordered_images.append(f)
|
439 |
break
|
440 |
+
|
441 |
+
# PDF Generation
|
442 |
+
st.subheader("๐ PDF Generation")
|
443 |
+
|
444 |
+
if st.button("๐๏ธ Generate PDF with Markdown & Images", type="primary"):
|
445 |
+
if not markdown_texts and not ordered_images:
|
446 |
+
st.warning("Please provide some markdown text or upload images to generate a PDF.")
|
447 |
else:
|
448 |
+
with st.spinner(f"Generating PDF with {page_size} page size..."):
|
449 |
+
if markdown_texts and not ordered_images:
|
450 |
+
# Use the enhanced create_pdf function
|
451 |
+
pdf_bytes = create_pdf(
|
452 |
+
markdown_texts=markdown_texts,
|
453 |
+
image_files=[],
|
454 |
+
base_font_size=font_size,
|
455 |
+
num_columns=columns,
|
456 |
+
add_space_before_numbered=True,
|
457 |
+
headings_to_fonts=True,
|
458 |
+
doc_title=f"Markdown_Document_{len(markdown_texts)}_files",
|
459 |
+
page_size=page_size
|
460 |
+
)
|
461 |
+
|
462 |
+
if pdf_bytes:
|
463 |
+
pdf_images = pdf_to_image(pdf_bytes)
|
464 |
+
if pdf_images:
|
465 |
+
st.subheader("Preview of Generated PDF")
|
466 |
+
for i, img in enumerate(pdf_images):
|
467 |
+
st.image(img, caption=f"Page {i+1}", use_container_width=True)
|
468 |
+
|
469 |
+
pdf_name = f"{stem}.pdf"
|
470 |
+
st.download_button("โฌ๏ธ Download PDF", data=pdf_bytes, file_name=pdf_name, mime="application/pdf")
|
471 |
+
else:
|
472 |
+
st.error("Failed to generate PDF from markdown.")
|
473 |
+
else:
|
474 |
+
# Fallback to original simple PDF generation for mixed content
|
475 |
+
buf = io.BytesIO()
|
476 |
+
|
477 |
+
# Get page dimensions for the selected page size
|
478 |
+
if page_size == "A4":
|
479 |
+
page_dimensions = A4
|
480 |
+
elif page_size == "Letter":
|
481 |
+
page_dimensions = letter
|
482 |
+
elif page_size == "Legal":
|
483 |
+
page_dimensions = legal
|
484 |
+
elif page_size == "A3":
|
485 |
+
page_dimensions = A3
|
486 |
+
elif page_size == "A5":
|
487 |
+
page_dimensions = A5
|
488 |
+
else:
|
489 |
+
page_dimensions = A4
|
490 |
+
|
491 |
+
c = canvas.Canvas(buf, pagesize=page_dimensions)
|
492 |
+
|
493 |
+
if plain_text.strip():
|
494 |
+
page_w, page_h = page_dimensions
|
495 |
+
margin = 40
|
496 |
+
gutter = 20
|
497 |
+
col_w = (page_w - 2*margin - (columns-1)*gutter) / columns
|
498 |
+
c.setFont(font_family, font_size)
|
499 |
+
line_height = font_size * 1.2
|
500 |
+
col = 0
|
501 |
+
x = margin
|
502 |
+
y = page_h - margin
|
503 |
+
avg_char_width = font_size * 0.6
|
504 |
+
wrap_width = int(col_w / avg_char_width) if avg_char_width > 0 else 100
|
505 |
+
for paragraph in plain_text.split("\n"):
|
506 |
+
if not paragraph.strip():
|
507 |
+
y -= line_height
|
508 |
+
if y < margin:
|
509 |
+
col += 1
|
510 |
+
if col >= columns:
|
511 |
+
c.showPage()
|
512 |
+
c.setFont(font_family, font_size)
|
513 |
+
col = 0
|
514 |
+
x = margin + col*(col_w+gutter)
|
515 |
+
y = page_h - margin
|
516 |
+
continue
|
517 |
+
for line in textwrap.wrap(paragraph, wrap_width):
|
518 |
+
if y < margin:
|
519 |
+
col += 1
|
520 |
+
if col >= columns:
|
521 |
+
c.showPage()
|
522 |
+
c.setFont(font_family, font_size)
|
523 |
+
col = 0
|
524 |
+
x = margin + col*(col_w+gutter)
|
525 |
+
y = page_h - margin
|
526 |
+
c.drawString(x, y, line)
|
527 |
+
y -= line_height
|
528 |
+
y -= line_height
|
529 |
+
for img_f in ordered_images:
|
530 |
+
try:
|
531 |
+
img = Image.open(img_f)
|
532 |
+
w, h = img.size
|
533 |
+
c.showPage()
|
534 |
+
c.setPageSize((w, h))
|
535 |
+
c.drawImage(ImageReader(img), 0, 0, w, h, preserveAspectRatio=False)
|
536 |
+
except Exception as e:
|
537 |
+
st.warning(f"Could not process image {img_f.name}: {e}")
|
538 |
+
continue
|
539 |
+
c.save()
|
540 |
+
buf.seek(0)
|
541 |
+
pdf_name = f"{stem}.pdf"
|
542 |
+
st.success(f"PDF generated successfully with {page_size} page size!")
|
543 |
+
st.download_button("โฌ๏ธ Download PDF", data=buf, file_name=pdf_name, mime="application/pdf")
|
544 |
+
|
545 |
st.markdown("---")
|
546 |
st.subheader("๐ Available Assets")
|
547 |
all_assets = glob.glob("*.*")
|
|
|
581 |
if not st.session_state.selected_assets:
|
582 |
st.warning("Please select at least one asset to generate a PDF.")
|
583 |
else:
|
584 |
+
selected_markdown_texts = []
|
585 |
image_files = []
|
586 |
for a in st.session_state.selected_assets:
|
587 |
ext = a.split('.')[-1].lower()
|
588 |
if ext == 'md':
|
589 |
with open(a, 'r', encoding='utf-8') as f:
|
590 |
+
selected_markdown_texts.append(f.read())
|
591 |
elif ext in ['png', 'jpg', 'jpeg']:
|
592 |
image_files.append(a)
|
593 |
with st.spinner("Generating PDF from selected assets..."):
|
594 |
pdf_bytes = create_pdf(
|
595 |
+
markdown_texts=selected_markdown_texts,
|
596 |
image_files=image_files,
|
597 |
+
base_font_size=font_size,
|
598 |
+
num_columns=columns,
|
599 |
add_space_before_numbered=True,
|
600 |
headings_to_fonts=True,
|
601 |
+
doc_title="Combined_Selected_Assets",
|
602 |
+
page_size=page_size
|
603 |
)
|
604 |
if pdf_bytes:
|
605 |
pdf_images = pdf_to_image(pdf_bytes)
|
|
|
616 |
)
|
617 |
else:
|
618 |
st.error("Failed to generate PDF.")
|
619 |
+
|
620 |
st.markdown("---")
|
621 |
st.subheader("๐ผ Image Gallery")
|
622 |
image_files = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg")
|
|
|
633 |
st.warning(f"Could not load image {image_file}: {e}")
|
634 |
else:
|
635 |
st.info("No images found in the current directory.")
|
636 |
+
|
637 |
st.markdown("---")
|
638 |
st.subheader("๐ฅ Video Gallery")
|
639 |
video_files = glob.glob("*.mp4")
|