Spaces:
Running
Running
Format Markdown-like bold & italics in text
Browse files- helpers/pptx_helper.py +87 -59
helpers/pptx_helper.py
CHANGED
@@ -52,6 +52,7 @@ FOREGROUND_IMAGE_PROBABILITY = 0.8
|
|
52 |
|
53 |
SLIDE_NUMBER_REGEX = re.compile(r"^slide[ ]+\d+:", re.IGNORECASE)
|
54 |
ICONS_REGEX = re.compile(r"\[\[(.*?)\]\]\s*(.*)")
|
|
|
55 |
|
56 |
ICON_COLORS = [
|
57 |
pptx.dml.color.RGBColor.from_string('800000'), # Maroon
|
@@ -82,6 +83,61 @@ def remove_slide_number_from_heading(header: str) -> str:
|
|
82 |
return header
|
83 |
|
84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
def generate_powerpoint_presentation(
|
86 |
parsed_data: dict,
|
87 |
slides_template: str,
|
@@ -280,16 +336,8 @@ def _handle_default_display(
|
|
280 |
# The bullet_points may contain a nested hierarchy of JSON arrays
|
281 |
# In some scenarios, it may contain objects (dictionaries) because the LLM generated so
|
282 |
# ^ The second scenario is not covered
|
283 |
-
|
284 |
flat_items_list = get_flat_list_of_contents(slide_json['bullet_points'], level=0)
|
285 |
-
|
286 |
-
for idx, an_item in enumerate(flat_items_list):
|
287 |
-
if idx == 0:
|
288 |
-
text_frame.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
|
289 |
-
else:
|
290 |
-
paragraph = text_frame.add_paragraph()
|
291 |
-
paragraph.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
|
292 |
-
paragraph.level = an_item[1]
|
293 |
|
294 |
_handle_key_message(
|
295 |
the_slide=slide,
|
@@ -345,14 +393,7 @@ def _handle_display_image__in_foreground(
|
|
345 |
text_col: SlidePlaceholder = slide.shapes.placeholders[idx]
|
346 |
|
347 |
flat_items_list = get_flat_list_of_contents(slide_json['bullet_points'], level=0)
|
348 |
-
|
349 |
-
for idx, an_item in enumerate(flat_items_list):
|
350 |
-
if idx == 0:
|
351 |
-
text_col.text_frame.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
|
352 |
-
else:
|
353 |
-
paragraph = text_col.text_frame.add_paragraph()
|
354 |
-
paragraph.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
|
355 |
-
paragraph.level = an_item[1]
|
356 |
|
357 |
if not img_keywords:
|
358 |
# No keywords, so no image search and addition
|
@@ -418,14 +459,7 @@ def _handle_display_image__in_background(
|
|
418 |
title_shape.text = remove_slide_number_from_heading(slide_json['heading'])
|
419 |
|
420 |
flat_items_list = get_flat_list_of_contents(slide_json['bullet_points'], level=0)
|
421 |
-
|
422 |
-
for idx, an_item in enumerate(flat_items_list):
|
423 |
-
if idx == 0:
|
424 |
-
body_shape.text_frame.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
|
425 |
-
else:
|
426 |
-
paragraph = body_shape.text_frame.add_paragraph()
|
427 |
-
paragraph.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
|
428 |
-
paragraph.level = an_item[1]
|
429 |
|
430 |
if not img_keywords:
|
431 |
# No keywords, so no image search and addition
|
@@ -537,7 +571,6 @@ def _handle_icons_ideas(
|
|
537 |
|
538 |
# Set the icon's background shape color
|
539 |
shape.fill.fore_color.rgb = shape.line.color.rgb = random.choice(ICON_COLORS)
|
540 |
-
|
541 |
# Add the icon image on top of the colored shape
|
542 |
slide.shapes.add_picture(icon_path, left, top, height=ICON_SIZE)
|
543 |
|
@@ -550,9 +583,9 @@ def _handle_icons_ideas(
|
|
550 |
height=text_box_size
|
551 |
)
|
552 |
text_frame = text_box.text_frame
|
553 |
-
text_frame.text = accompanying_text
|
554 |
text_frame.word_wrap = True
|
555 |
text_frame.paragraphs[0].alignment = pptx.enum.text.PP_ALIGN.CENTER
|
|
|
556 |
|
557 |
# Center the text vertically
|
558 |
text_frame.vertical_anchor = pptx.enum.text.MSO_ANCHOR.MIDDLE
|
@@ -685,13 +718,7 @@ def _handle_double_col_layout(
|
|
685 |
if not left_heading:
|
686 |
left_col_frame.text = double_col_content[0]['heading']
|
687 |
|
688 |
-
|
689 |
-
if left_heading and idx == 0:
|
690 |
-
left_col_frame.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
|
691 |
-
else:
|
692 |
-
paragraph = left_col_frame.add_paragraph()
|
693 |
-
paragraph.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
|
694 |
-
paragraph.level = an_item[1]
|
695 |
|
696 |
if 'heading' in double_col_content[1] and right_heading:
|
697 |
right_heading.text = double_col_content[1]['heading']
|
@@ -703,13 +730,7 @@ def _handle_double_col_layout(
|
|
703 |
if not right_heading:
|
704 |
right_col_frame.text = double_col_content[1]['heading']
|
705 |
|
706 |
-
|
707 |
-
if right_col_frame and idx == 0:
|
708 |
-
right_col_frame.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
|
709 |
-
else:
|
710 |
-
paragraph = right_col_frame.add_paragraph()
|
711 |
-
paragraph.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
|
712 |
-
paragraph.level = an_item[1]
|
713 |
|
714 |
_handle_key_message(
|
715 |
the_slide=slide,
|
@@ -792,7 +813,11 @@ def _handle_step_by_step_process(
|
|
792 |
|
793 |
for step in steps:
|
794 |
shape = shapes.add_shape(MSO_AUTO_SHAPE_TYPE.CHEVRON, left, top, width, height)
|
795 |
-
|
|
|
|
|
|
|
|
|
796 |
left += width - INCHES_0_4
|
797 |
elif 4 < n_steps <= 6:
|
798 |
# Vertical display
|
@@ -817,7 +842,11 @@ def _handle_step_by_step_process(
|
|
817 |
|
818 |
for step in steps:
|
819 |
shape = shapes.add_shape(MSO_AUTO_SHAPE_TYPE.PENTAGON, left, top, width, height)
|
820 |
-
|
|
|
|
|
|
|
|
|
821 |
top += height + INCHES_0_3
|
822 |
left += INCHES_0_5
|
823 |
|
@@ -851,7 +880,7 @@ def _handle_key_message(
|
|
851 |
width=width,
|
852 |
height=height
|
853 |
)
|
854 |
-
shape.
|
855 |
|
856 |
|
857 |
def _get_slide_width_height_inches(presentation: pptx.Presentation) -> Tuple[float, float]:
|
@@ -864,7 +893,6 @@ def _get_slide_width_height_inches(presentation: pptx.Presentation) -> Tuple[flo
|
|
864 |
|
865 |
slide_width_inch = EMU_TO_INCH_SCALING_FACTOR * presentation.slide_width
|
866 |
slide_height_inch = EMU_TO_INCH_SCALING_FACTOR * presentation.slide_height
|
867 |
-
# logger.debug('Slide width: %f, height: %f', slide_width_inch, slide_height_inch)
|
868 |
|
869 |
return slide_width_inch, slide_height_inch
|
870 |
|
@@ -877,7 +905,7 @@ if __name__ == '__main__':
|
|
877 |
{
|
878 |
"heading": "Introduction to AI Applications",
|
879 |
"bullet_points": [
|
880 |
-
"Artificial Intelligence (AI) is transforming various industries",
|
881 |
"AI applications range from simple decision-making tools to complex systems",
|
882 |
"AI can be categorized into types: Rule-based, Instance-based, and Model-based"
|
883 |
],
|
@@ -887,9 +915,9 @@ if __name__ == '__main__':
|
|
887 |
{
|
888 |
"heading": "AI in Everyday Life",
|
889 |
"bullet_points": [
|
890 |
-
"Virtual assistants like Siri, Alexa, and Google Assistant",
|
891 |
-
"Recommender systems in Netflix, Amazon, and Spotify",
|
892 |
-
"Fraud detection in banking and credit card transactions"
|
893 |
],
|
894 |
"key_message": "AI is integrated into our daily lives through various services",
|
895 |
"img_keywords": "virtual assistants, recommender systems, fraud detection"
|
@@ -939,11 +967,11 @@ if __name__ == '__main__':
|
|
939 |
{
|
940 |
"heading": "Step-by-Step: AI Development Process",
|
941 |
"bullet_points": [
|
942 |
-
">> Define the problem and objectives",
|
943 |
-
">> Collect and preprocess data",
|
944 |
-
">> Select and train the AI model",
|
945 |
-
">> Evaluate and optimize the model",
|
946 |
-
">> Deploy and monitor the AI system"
|
947 |
],
|
948 |
"key_message": "Developing AI involves a structured process from problem definition to deployment",
|
949 |
"img_keywords": ""
|
@@ -951,11 +979,11 @@ if __name__ == '__main__':
|
|
951 |
{
|
952 |
"heading": "AI Icons: Key Aspects",
|
953 |
"bullet_points": [
|
954 |
-
"[[brain]] Human-like intelligence and decision-making",
|
955 |
-
"[[robot]] Automation and physical tasks",
|
956 |
"[[]] Data processing and cloud computing",
|
957 |
-
"[[lightbulb]] Insights and predictions",
|
958 |
-
"[[globe2]] Global connectivity and impact"
|
959 |
],
|
960 |
"key_message": "AI encompasses various aspects, from human-like intelligence to global impact",
|
961 |
"img_keywords": "AI aspects, intelligence, automation, data processing, global impact"
|
@@ -968,7 +996,7 @@ if __name__ == '__main__':
|
|
968 |
"Invest in AI education and workforce development",
|
969 |
"Call to action: Explore AI applications and contribute to shaping its future"
|
970 |
],
|
971 |
-
"key_message": "AI offers immense potential
|
972 |
"img_keywords": "AI transformation, ethical considerations, AI education, future of AI"
|
973 |
}
|
974 |
]
|
|
|
52 |
|
53 |
SLIDE_NUMBER_REGEX = re.compile(r"^slide[ ]+\d+:", re.IGNORECASE)
|
54 |
ICONS_REGEX = re.compile(r"\[\[(.*?)\]\]\s*(.*)")
|
55 |
+
BOLD_ITALICS_PATTERN = re.compile(r'(\*\*(.*?)\*\*|\*(.*?)\*)')
|
56 |
|
57 |
ICON_COLORS = [
|
58 |
pptx.dml.color.RGBColor.from_string('800000'), # Maroon
|
|
|
83 |
return header
|
84 |
|
85 |
|
86 |
+
def add_bulleted_items(text_frame: pptx.text.text.TextFrame, flat_items_list: list):
|
87 |
+
"""
|
88 |
+
Add a list of texts as bullet points and apply formatting.
|
89 |
+
|
90 |
+
:param text_frame: The text frame where text is to be displayed.
|
91 |
+
:param flat_items_list: The list of items to be displayed.
|
92 |
+
"""
|
93 |
+
|
94 |
+
for idx, an_item in enumerate(flat_items_list):
|
95 |
+
if idx == 0:
|
96 |
+
paragraph = text_frame.paragraphs[0] # First paragraph for title text
|
97 |
+
else:
|
98 |
+
paragraph = text_frame.add_paragraph()
|
99 |
+
paragraph.level = an_item[1]
|
100 |
+
|
101 |
+
format_text(paragraph, an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER))
|
102 |
+
|
103 |
+
|
104 |
+
def format_text(frame_paragraph, text):
|
105 |
+
"""
|
106 |
+
Apply bold and italic formatting while preserving the original word order
|
107 |
+
without duplication.
|
108 |
+
"""
|
109 |
+
|
110 |
+
matches = list(BOLD_ITALICS_PATTERN.finditer(text))
|
111 |
+
last_index = 0 # Track position in the text
|
112 |
+
# Group 0: Full match (e.g., **bold** or *italic*)
|
113 |
+
# Group 1: The outer parentheses (captures either bold or italic match, because of |)
|
114 |
+
# Group 2: The bold text inside **bold**
|
115 |
+
# Group 3: The italic text inside *italic*
|
116 |
+
for match in matches:
|
117 |
+
start, end = match.span()
|
118 |
+
# Add unformatted text before the formatted section
|
119 |
+
if start > last_index:
|
120 |
+
run = frame_paragraph.add_run()
|
121 |
+
run.text = text[last_index:start]
|
122 |
+
|
123 |
+
# Extract formatted text
|
124 |
+
if match.group(2): # Bold
|
125 |
+
run = frame_paragraph.add_run()
|
126 |
+
run.text = match.group(2)
|
127 |
+
run.font.bold = True
|
128 |
+
elif match.group(3): # Italics
|
129 |
+
run = frame_paragraph.add_run()
|
130 |
+
run.text = match.group(3)
|
131 |
+
run.font.italic = True
|
132 |
+
|
133 |
+
last_index = end # Update position
|
134 |
+
|
135 |
+
# Add any remaining unformatted text
|
136 |
+
if last_index < len(text):
|
137 |
+
run = frame_paragraph.add_run()
|
138 |
+
run.text = text[last_index:]
|
139 |
+
|
140 |
+
|
141 |
def generate_powerpoint_presentation(
|
142 |
parsed_data: dict,
|
143 |
slides_template: str,
|
|
|
336 |
# The bullet_points may contain a nested hierarchy of JSON arrays
|
337 |
# In some scenarios, it may contain objects (dictionaries) because the LLM generated so
|
338 |
# ^ The second scenario is not covered
|
|
|
339 |
flat_items_list = get_flat_list_of_contents(slide_json['bullet_points'], level=0)
|
340 |
+
add_bulleted_items(text_frame, flat_items_list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
341 |
|
342 |
_handle_key_message(
|
343 |
the_slide=slide,
|
|
|
393 |
text_col: SlidePlaceholder = slide.shapes.placeholders[idx]
|
394 |
|
395 |
flat_items_list = get_flat_list_of_contents(slide_json['bullet_points'], level=0)
|
396 |
+
add_bulleted_items(text_col.text_frame, flat_items_list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
397 |
|
398 |
if not img_keywords:
|
399 |
# No keywords, so no image search and addition
|
|
|
459 |
title_shape.text = remove_slide_number_from_heading(slide_json['heading'])
|
460 |
|
461 |
flat_items_list = get_flat_list_of_contents(slide_json['bullet_points'], level=0)
|
462 |
+
add_bulleted_items(body_shape.text_frame, flat_items_list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
463 |
|
464 |
if not img_keywords:
|
465 |
# No keywords, so no image search and addition
|
|
|
571 |
|
572 |
# Set the icon's background shape color
|
573 |
shape.fill.fore_color.rgb = shape.line.color.rgb = random.choice(ICON_COLORS)
|
|
|
574 |
# Add the icon image on top of the colored shape
|
575 |
slide.shapes.add_picture(icon_path, left, top, height=ICON_SIZE)
|
576 |
|
|
|
583 |
height=text_box_size
|
584 |
)
|
585 |
text_frame = text_box.text_frame
|
|
|
586 |
text_frame.word_wrap = True
|
587 |
text_frame.paragraphs[0].alignment = pptx.enum.text.PP_ALIGN.CENTER
|
588 |
+
format_text(text_frame.paragraphs[0], accompanying_text)
|
589 |
|
590 |
# Center the text vertically
|
591 |
text_frame.vertical_anchor = pptx.enum.text.MSO_ANCHOR.MIDDLE
|
|
|
718 |
if not left_heading:
|
719 |
left_col_frame.text = double_col_content[0]['heading']
|
720 |
|
721 |
+
add_bulleted_items(left_col_frame, flat_items_list)
|
|
|
|
|
|
|
|
|
|
|
|
|
722 |
|
723 |
if 'heading' in double_col_content[1] and right_heading:
|
724 |
right_heading.text = double_col_content[1]['heading']
|
|
|
730 |
if not right_heading:
|
731 |
right_col_frame.text = double_col_content[1]['heading']
|
732 |
|
733 |
+
add_bulleted_items(right_col_frame, flat_items_list)
|
|
|
|
|
|
|
|
|
|
|
|
|
734 |
|
735 |
_handle_key_message(
|
736 |
the_slide=slide,
|
|
|
813 |
|
814 |
for step in steps:
|
815 |
shape = shapes.add_shape(MSO_AUTO_SHAPE_TYPE.CHEVRON, left, top, width, height)
|
816 |
+
text_frame = shape.text_frame
|
817 |
+
text_frame.clear()
|
818 |
+
paragraph = text_frame.paragraphs[0]
|
819 |
+
paragraph.alignment = pptx.enum.text.PP_ALIGN.LEFT
|
820 |
+
format_text(paragraph, step.removeprefix(STEP_BY_STEP_PROCESS_MARKER))
|
821 |
left += width - INCHES_0_4
|
822 |
elif 4 < n_steps <= 6:
|
823 |
# Vertical display
|
|
|
842 |
|
843 |
for step in steps:
|
844 |
shape = shapes.add_shape(MSO_AUTO_SHAPE_TYPE.PENTAGON, left, top, width, height)
|
845 |
+
text_frame = shape.text_frame
|
846 |
+
text_frame.clear()
|
847 |
+
paragraph = text_frame.paragraphs[0]
|
848 |
+
paragraph.alignment = pptx.enum.text.PP_ALIGN.LEFT
|
849 |
+
format_text(paragraph, step.removeprefix(STEP_BY_STEP_PROCESS_MARKER))
|
850 |
top += height + INCHES_0_3
|
851 |
left += INCHES_0_5
|
852 |
|
|
|
880 |
width=width,
|
881 |
height=height
|
882 |
)
|
883 |
+
format_text(shape.text_frame.paragraphs[0], slide_json['key_message'])
|
884 |
|
885 |
|
886 |
def _get_slide_width_height_inches(presentation: pptx.Presentation) -> Tuple[float, float]:
|
|
|
893 |
|
894 |
slide_width_inch = EMU_TO_INCH_SCALING_FACTOR * presentation.slide_width
|
895 |
slide_height_inch = EMU_TO_INCH_SCALING_FACTOR * presentation.slide_height
|
|
|
896 |
|
897 |
return slide_width_inch, slide_height_inch
|
898 |
|
|
|
905 |
{
|
906 |
"heading": "Introduction to AI Applications",
|
907 |
"bullet_points": [
|
908 |
+
"Artificial Intelligence (AI) is *transforming* various industries",
|
909 |
"AI applications range from simple decision-making tools to complex systems",
|
910 |
"AI can be categorized into types: Rule-based, Instance-based, and Model-based"
|
911 |
],
|
|
|
915 |
{
|
916 |
"heading": "AI in Everyday Life",
|
917 |
"bullet_points": [
|
918 |
+
"**Virtual assistants** like Siri, Alexa, and Google Assistant",
|
919 |
+
"**Recommender systems** in Netflix, Amazon, and Spotify",
|
920 |
+
"**Fraud detection** in banking and *credit card* transactions"
|
921 |
],
|
922 |
"key_message": "AI is integrated into our daily lives through various services",
|
923 |
"img_keywords": "virtual assistants, recommender systems, fraud detection"
|
|
|
967 |
{
|
968 |
"heading": "Step-by-Step: AI Development Process",
|
969 |
"bullet_points": [
|
970 |
+
">> **Step 1:** Define the problem and objectives",
|
971 |
+
">> **Step 2:** Collect and preprocess data",
|
972 |
+
">> **Step 3:** Select and train the AI model",
|
973 |
+
">> **Step 4:** Evaluate and optimize the model",
|
974 |
+
">> **Step 5:** Deploy and monitor the AI system"
|
975 |
],
|
976 |
"key_message": "Developing AI involves a structured process from problem definition to deployment",
|
977 |
"img_keywords": ""
|
|
|
979 |
{
|
980 |
"heading": "AI Icons: Key Aspects",
|
981 |
"bullet_points": [
|
982 |
+
"[[brain]] Human-like *intelligence* and decision-making",
|
983 |
+
"[[robot]] Automation and physical *tasks*",
|
984 |
"[[]] Data processing and cloud computing",
|
985 |
+
"[[lightbulb]] Insights and *predictions*",
|
986 |
+
"[[globe2]] Global connectivity and *impact*"
|
987 |
],
|
988 |
"key_message": "AI encompasses various aspects, from human-like intelligence to global impact",
|
989 |
"img_keywords": "AI aspects, intelligence, automation, data processing, global impact"
|
|
|
996 |
"Invest in AI education and workforce development",
|
997 |
"Call to action: Explore AI applications and contribute to shaping its future"
|
998 |
],
|
999 |
+
"key_message": "AI offers *immense potential*, and we must embrace it responsibly",
|
1000 |
"img_keywords": "AI transformation, ethical considerations, AI education, future of AI"
|
1001 |
}
|
1002 |
]
|