awacke1 commited on
Commit
64c8743
·
verified ·
1 Parent(s): 58a836d

Delete backup7.app.py

Browse files
Files changed (1) hide show
  1. backup7.app.py +0 -427
backup7.app.py DELETED
@@ -1,427 +0,0 @@
1
- import io
2
- import re
3
- import os
4
- import glob
5
- import asyncio
6
- import hashlib
7
- import unicodedata
8
- import streamlit as st
9
- from PIL import Image
10
- import fitz
11
- import edge_tts
12
- from reportlab.lib.pagesizes import A4
13
- from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
14
- from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
15
- from reportlab.lib import colors
16
- from reportlab.pdfbase import pdfmetrics
17
- from reportlab.pdfbase.ttfonts import TTFont
18
- from datetime import datetime
19
- import pytz
20
-
21
- st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
22
-
23
- # 🕒 Time flies when you're having function calls
24
- def get_timestamp_prefix():
25
- """🕰️ Gets a timestamp that's more reliable than your coworker's ETA estimates"""
26
- central = pytz.timezone("US/Central")
27
- now = datetime.now(central)
28
- # Format: three-letter day, MMDD, HHMM + AM/PM (all uppercase)
29
- return now.strftime("%a %m%d %I%M%p").upper()
30
-
31
- # 🧹 Because text needs a bath before being spoken
32
- def clean_for_speech(text):
33
- """🧼 Scrubs your text cleaner than your bathroom will ever be"""
34
- # Remove hash marks
35
- text = text.replace("#", "")
36
- # Remove emojis using a regex pattern that covers a wide range
37
- emoji_pattern = re.compile(
38
- r"[\U0001F300-\U0001F5FF"
39
- r"\U0001F600-\U0001F64F"
40
- r"\U0001F680-\U0001F6FF"
41
- r"\U0001F700-\U0001F77F"
42
- r"\U0001F780-\U0001F7FF"
43
- r"\U0001F800-\U0001F8FF"
44
- r"\U0001F900-\U0001F9FF"
45
- r"\U0001FA00-\U0001FA6F"
46
- r"\U0001FA70-\U0001FAFF"
47
- r"\u2600-\u26FF"
48
- r"\u2700-\u27BF]+", flags=re.UNICODE)
49
- text = emoji_pattern.sub('', text)
50
- return text
51
-
52
- # 🎤 Making robots talk so you don't have to
53
- async def generate_audio(text, voice, filename):
54
- """🔊 Turn text into speech, because reading is so last century"""
55
- communicate = edge_tts.Communicate(text, voice)
56
- await communicate.save(filename)
57
- return filename
58
-
59
- # 🔗 Detecting links like a digital bloodhound
60
- def detect_and_convert_links(text):
61
- """🕸️ Finds URLs in your text and turns them into actual clickable links"""
62
- # Pattern to find URLs (http/https/ftp/www)
63
- url_pattern = re.compile(
64
- r'(https?://|www\.)[^\s\[\]()<>{}]+(\.[^\s\[\]()<>{}]+)+(/[^\s\[\]()<>{}]*)?',
65
- re.IGNORECASE
66
- )
67
-
68
- # Pattern to find markdown links [text](url)
69
- md_link_pattern = re.compile(r'\[(.*?)\]\((https?://[^\s\[\]()<>{}]+)\)')
70
-
71
- # First convert markdown links
72
- text = md_link_pattern.sub(r'<a href="\2">\1</a>', text)
73
-
74
- # Then find and convert plain URLs not already in tags
75
- start_idx = 0
76
- result = []
77
-
78
- while start_idx < len(text):
79
- # Find the next URL
80
- match = url_pattern.search(text, start_idx)
81
- if not match:
82
- # No more URLs, add the remaining text
83
- result.append(text[start_idx:])
84
- break
85
-
86
- # Check if the URL is already inside an <a> tag
87
- # This is a simplified check. A more robust approach might use an HTML parser
88
- prev_text = text[start_idx:match.start()]
89
- tag_balance = prev_text.count('<a') - prev_text.count('</a')
90
-
91
- if tag_balance > 0:
92
- # URL is likely inside a tag, skip this match
93
- result.append(text[start_idx:match.end()])
94
- else:
95
- # Add text before the URL
96
- result.append(text[start_idx:match.start()])
97
-
98
- # Get the URL
99
- url = match.group(0)
100
-
101
- # Add proper URL prefix if needed
102
- if url.startswith('www.'):
103
- url_with_prefix = 'http://' + url
104
- else:
105
- url_with_prefix = url
106
-
107
- # Add the URL as a link
108
- result.append(f'<a href="{url_with_prefix}">{url}</a>')
109
-
110
- start_idx = match.end()
111
-
112
- return ''.join(result)
113
-
114
- # 🎭 Making emojis wear the right font costume
115
- def apply_emoji_font(text, emoji_font):
116
- """🦄 Because emojis deserve their own font fashion show"""
117
- # First handle links - temporarily replace them with placeholders
118
- link_pattern = re.compile(r'<a\s+href="([^"]+)">(.*?)</a>')
119
- links = []
120
-
121
- def save_link(match):
122
- link_idx = len(links)
123
- links.append((match.group(1), match.group(2)))
124
- return f"###LINK_{link_idx}###"
125
-
126
- text = link_pattern.sub(save_link, text)
127
-
128
- # Now handle bold formatting
129
- text = re.sub(r'<b>(.*?)</b>', lambda m: f'###BOLD_START###{m.group(1)}###BOLD_END###', text)
130
-
131
- # Apply emoji font replacement
132
- emoji_pattern = re.compile(
133
- r"([\U0001F300-\U0001F5FF"
134
- r"\U0001F600-\U0001F64F"
135
- r"\U0001F680-\U0001F6FF"
136
- r"\U0001F700-\U0001F77F"
137
- r"\U0001F780-\U0001F7FF"
138
- r"\U0001F800-\U0001F8FF"
139
- r"\U0001F900-\U0001F9FF"
140
- r"\U0001FAD0-\U0001FAD9" # additional range if needed
141
- r"\U0001FA00-\U0001FA6F"
142
- r"\U0001FA70-\U0001FAFF"
143
- r"\u2600-\u26FF"
144
- r"\u2700-\u27BF]+)"
145
- )
146
-
147
- def replace_emoji(match):
148
- emoji = match.group(1)
149
- emoji = unicodedata.normalize('NFC', emoji)
150
- return f'<font face="{emoji_font}">{emoji}</font>'
151
-
152
- segments = []
153
- last_pos = 0
154
-
155
- for match in emoji_pattern.finditer(text):
156
- start, end = match.span()
157
- if last_pos < start:
158
- segments.append(f'<font face="DejaVuSans">{text[last_pos:start]}</font>')
159
- segments.append(replace_emoji(match))
160
- last_pos = end
161
-
162
- if last_pos < len(text):
163
- segments.append(f'<font face="DejaVuSans">{text[last_pos:]}</font>')
164
-
165
- combined_text = ''.join(segments)
166
-
167
- # Restore bold tags
168
- combined_text = combined_text.replace('###BOLD_START###', '</font><b><font face="DejaVuSans">')
169
- combined_text = combined_text.replace('###BOLD_END###', '</font></b><font face="DejaVuSans">')
170
-
171
- # Restore links
172
- for i, (url, label) in enumerate(links):
173
- placeholder = f"###LINK_{i}###"
174
- if placeholder in combined_text:
175
- # If the link is within a font tag, we need to close and reopen it
176
- parts = combined_text.split(placeholder)
177
- if len(parts) == 2:
178
- before, after = parts
179
- # Check if we're inside a font tag
180
- if before.rfind('<font') > before.rfind('</font>'):
181
- # Close font tag before link, reopen after
182
- link_html = f'</font><a href="{url}">{label}</a><font face="DejaVuSans">'
183
- combined_text = before + link_html + after
184
- else:
185
- # Simple replacement
186
- combined_text = before + f'<a href="{url}">{label}</a>' + after
187
-
188
- return combined_text
189
-
190
- # 📝 Converting markdown to PDF content, because PDFs never go out of style
191
- def markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers):
192
- """📋 Transforms your disorganized thoughts into structured PDF content"""
193
- lines = markdown_text.strip().split('\n')
194
- pdf_content = []
195
- number_pattern = re.compile(r'^\d+\.\s')
196
-
197
- for line in lines:
198
- line = line.strip()
199
- if not line or line.startswith('# '):
200
- continue
201
-
202
- # Process links before any other formatting
203
- line = detect_and_convert_links(line)
204
-
205
- # Handle bold formatting
206
- if render_with_bold:
207
- line = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line)
208
-
209
- if auto_bold_numbers and number_pattern.match(line):
210
- # Only apply bold if not already entirely bold
211
- if not (line.startswith("<b>") and line.endswith("</b>")):
212
- # If there's already some bold formatting inside, we need to handle carefully
213
- if "<b>" in line and "</b>" in line:
214
- # Complex case - for simplicity, just make the whole line bold
215
- # but remove any existing bold tags first
216
- line = re.sub(r'</?b>', '', line)
217
- line = f"<b>{line}</b>"
218
- else:
219
- line = f"<b>{line}</b>"
220
-
221
- pdf_content.append(line)
222
-
223
- total_lines = len(pdf_content)
224
- return pdf_content, total_lines
225
-
226
- # 🏗️ Building PDFs like it's your second job
227
- def create_pdf(markdown_text, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns):
228
- """🔨 Constructs a PDF with the precision of a sleep-deprived architect"""
229
- buffer = io.BytesIO()
230
- page_width = A4[0] * 2
231
- page_height = A4[1]
232
- doc = SimpleDocTemplate(buffer, pagesize=(page_width, page_height), leftMargin=36, rightMargin=36, topMargin=36, bottomMargin=36)
233
- styles = getSampleStyleSheet()
234
- spacer_height = 10
235
- pdf_content, total_lines = markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers)
236
-
237
- # Define styles for different text types
238
- item_style = ParagraphStyle(
239
- 'ItemStyle', parent=styles['Normal'], fontName="DejaVuSans",
240
- fontSize=base_font_size, leading=base_font_size * 1.15, spaceAfter=1,
241
- linkUnderline=True # Enable underline for links
242
- )
243
- numbered_bold_style = ParagraphStyle(
244
- 'NumberedBoldStyle', parent=styles['Normal'], fontName="NotoEmoji-Bold",
245
- fontSize=base_font_size + 1 if enlarge_numbered else base_font_size,
246
- leading=(base_font_size + 1) * 1.15 if enlarge_numbered else base_font_size * 1.15, spaceAfter=1,
247
- linkUnderline=True # Enable underline for links
248
- )
249
- section_style = ParagraphStyle(
250
- 'SectionStyle', parent=styles['Heading2'], fontName="DejaVuSans",
251
- textColor=colors.darkblue, fontSize=base_font_size * 1.1, leading=base_font_size * 1.32, spaceAfter=2,
252
- linkUnderline=True # Enable underline for links
253
- )
254
-
255
- # Register fonts
256
- try:
257
- available_font_files = glob.glob("*.ttf")
258
- if not available_font_files:
259
- st.error("No .ttf font files found in the current directory.")
260
- return
261
- selected_font_path = None
262
- for f in available_font_files:
263
- if "NotoEmoji-Bold" in f:
264
- selected_font_path = f
265
- break
266
- if selected_font_path:
267
- pdfmetrics.registerFont(TTFont("NotoEmoji-Bold", selected_font_path))
268
- pdfmetrics.registerFont(TTFont("DejaVuSans", "DejaVuSans.ttf"))
269
- except Exception as e:
270
- st.error(f"Font registration error: {e}")
271
- return
272
-
273
- # Distribute content across columns
274
- columns = [[] for _ in range(num_columns)]
275
- lines_per_column = total_lines / num_columns if num_columns > 0 else total_lines
276
- current_line_count = 0
277
- current_column = 0
278
- number_pattern = re.compile(r'^\d+\.\s')
279
-
280
- for item in pdf_content:
281
- if current_line_count >= lines_per_column and current_column < num_columns - 1:
282
- current_column += 1
283
- current_line_count = 0
284
- columns[current_column].append(item)
285
- current_line_count += 1
286
-
287
- # Format columns into Paragraph objects
288
- column_cells = [[] for _ in range(num_columns)]
289
- for col_idx, column in enumerate(columns):
290
- for item in column:
291
- if isinstance(item, str) and item.startswith("<b>") and item.endswith("</b>"):
292
- content = item[3:-4].strip()
293
- if number_pattern.match(content):
294
- column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), numbered_bold_style))
295
- else:
296
- column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), section_style))
297
- else:
298
- column_cells[col_idx].append(Paragraph(apply_emoji_font(item, "DejaVuSans"), item_style))
299
-
300
- # Ensure columns have the same number of cells
301
- max_cells = max(len(cells) for cells in column_cells) if column_cells else 0
302
- for cells in column_cells:
303
- cells.extend([Paragraph("", item_style)] * (max_cells - len(cells)))
304
-
305
- # Create the table layout
306
- col_width = (page_width - 72) / num_columns if num_columns > 0 else page_width - 72
307
- table_data = list(zip(*column_cells)) if column_cells else [[]]
308
- table = Table(table_data, colWidths=[col_width] * num_columns, hAlign='CENTER')
309
- table.setStyle(TableStyle([
310
- ('VALIGN', (0, 0), (-1, -1), 'TOP'),
311
- ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
312
- ('BACKGROUND', (0, 0), (-1, -1), colors.white),
313
- ('GRID', (0, 0), (-1, -1), 0, colors.white),
314
- ('LINEAFTER', (0, 0), (num_columns-1, -1), 0.5, colors.grey),
315
- ('LEFTPADDING', (0, 0), (-1, -1), 2),
316
- ('RIGHTPADDING', (0, 0), (-1, -1), 2),
317
- ('TOPPADDING', (0, 0), (-1, -1), 1),
318
- ('BOTTOMPADDING', (0, 0), (-1, -1), 1),
319
- ]))
320
-
321
- # Build the PDF
322
- story = [Spacer(1, spacer_height), table]
323
- doc.build(story)
324
- buffer.seek(0)
325
- return buffer.getvalue()
326
-
327
- # 🖼️ Converting PDFs to images, because we can't leave well enough alone
328
- def pdf_to_image(pdf_bytes):
329
- """🔎 Turns your PDF into pictures because some people just want to see the world rendered"""
330
- try:
331
- doc = fitz.open(stream=pdf_bytes, filetype="pdf")
332
- images = []
333
- for page in doc:
334
- pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
335
- img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
336
- images.append(img)
337
- doc.close()
338
- return images
339
- except Exception as e:
340
- st.error(f"Failed to render PDF preview: {e}")
341
- return None
342
-
343
- # -- Markdown File Selection --
344
- md_files = [f for f in glob.glob("*.md") if os.path.basename(f) != "README.md"]
345
- md_options = [os.path.splitext(os.path.basename(f))[0] for f in md_files]
346
-
347
- # 🎪 The main Streamlit show begins here
348
- with st.sidebar:
349
- st.markdown("### PDF Options")
350
- if md_options:
351
- selected_md = st.selectbox("Select Markdown File", options=md_options, index=0)
352
- with open(f"{selected_md}.md", "r", encoding="utf-8") as f:
353
- st.session_state.markdown_content = f.read()
354
- else:
355
- st.warning("No markdown file found. Please add one to your folder.")
356
- selected_md = None
357
- st.session_state.markdown_content = ""
358
-
359
- available_font_files = {os.path.splitext(os.path.basename(f))[0]: f for f in glob.glob("*.ttf")}
360
- selected_font_name = st.selectbox("Select Emoji Font", options=list(available_font_files.keys()),
361
- index=list(available_font_files.keys()).index("NotoEmoji-Bold") if "NotoEmoji-Bold" in available_font_files else 0)
362
- base_font_size = st.slider("Font Size (points)", min_value=6, max_value=16, value=8, step=1)
363
- render_with_bold = st.checkbox("Render with Bold Formatting (remove ** markers)", value=True, key="render_with_bold")
364
- auto_bold_numbers = st.checkbox("Auto Bold Numbered Lines", value=True, key="auto_bold_numbers")
365
- enlarge_numbered = st.checkbox("Enlarge Font Size for Numbered Lines", value=True, key="enlarge_numbered")
366
- num_columns = st.selectbox("Number of Columns", options=[1, 2, 3, 4, 5, 6], index=3)
367
-
368
- # Use the file's content for editing
369
- edited_markdown = st.text_area("Modify the markdown content below:", value=st.session_state.markdown_content, height=300, key=f"markdown_{selected_md}_{selected_font_name}_{num_columns}")
370
- if st.button("Update PDF"):
371
- st.session_state.markdown_content = edited_markdown
372
- if selected_md:
373
- with open(f"{selected_md}.md", "w", encoding="utf-8") as f:
374
- f.write(edited_markdown)
375
- st.rerun()
376
-
377
- # Create a timestamp prefix for file naming
378
- prefix = get_timestamp_prefix()
379
-
380
- # Download button for Markdown with new naming convention and double emoji label
381
- st.download_button(
382
- label="💾📝 Save Markdown",
383
- data=st.session_state.markdown_content,
384
- file_name=f"{prefix} {selected_md}.md" if selected_md else f"{prefix} default.md",
385
- mime="text/markdown"
386
- )
387
-
388
- st.markdown("### Text-to-Speech")
389
- VOICES = ["en-US-AriaNeural", "en-US-JennyNeural", "en-GB-SoniaNeural", "en-US-GuyNeural", "en-US-AnaNeural"]
390
- selected_voice = st.selectbox("Select Voice for TTS", options=VOICES, index=0)
391
- if st.button("Generate Audio"):
392
- # Clean markdown input for speech generation
393
- cleaned_text = clean_for_speech(st.session_state.markdown_content)
394
- # Create a filename for the audio file using the timestamp, markdown name, and selected voice
395
- audio_filename = f"{prefix} {selected_md} {selected_voice}.mp3" if selected_md else f"{prefix} default {selected_voice}.mp3"
396
- audio_file = asyncio.run(generate_audio(cleaned_text, selected_voice, audio_filename))
397
- st.audio(audio_file)
398
- with open(audio_file, "rb") as f:
399
- audio_bytes = f.read()
400
- st.download_button(
401
- label="💾🔊 Save Audio",
402
- data=audio_bytes,
403
- file_name=audio_filename,
404
- mime="audio/mpeg"
405
- )
406
-
407
- # 🚀 Generating the PDF with more complexity than a rocket launch
408
- with st.spinner("Generating PDF..."):
409
- pdf_bytes = create_pdf(st.session_state.markdown_content, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns)
410
-
411
- # 📺 Displaying the preview, because everyone loves to window shop
412
- with st.container():
413
- pdf_images = pdf_to_image(pdf_bytes)
414
- if pdf_images:
415
- for img in pdf_images:
416
- st.image(img, use_container_width=True)
417
- else:
418
- st.info("Download the PDF to view it locally.")
419
-
420
- # 💾 Last chance to save your masterpiece before it's gone forever
421
- with st.sidebar:
422
- st.download_button(
423
- label="💾📄 Save PDF",
424
- data=pdf_bytes,
425
- file_name=f"{prefix} {selected_md}.pdf" if selected_md else f"{prefix} output.pdf",
426
- mime="application/pdf"
427
- )