barunsaha commited on
Commit
7d37c8b
·
1 Parent(s): b1006c1

Format Markdown-like bold & italics in text

Browse files
Files changed (1) hide show
  1. helpers/pptx_helper.py +87 -59
helpers/pptx_helper.py CHANGED
@@ -52,6 +52,7 @@ FOREGROUND_IMAGE_PROBABILITY = 0.8
52
 
53
  SLIDE_NUMBER_REGEX = re.compile(r"^slide[ ]+\d+:", re.IGNORECASE)
54
  ICONS_REGEX = re.compile(r"\[\[(.*?)\]\]\s*(.*)")
 
55
 
56
  ICON_COLORS = [
57
  pptx.dml.color.RGBColor.from_string('800000'), # Maroon
@@ -82,6 +83,61 @@ def remove_slide_number_from_heading(header: str) -> str:
82
  return header
83
 
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  def generate_powerpoint_presentation(
86
  parsed_data: dict,
87
  slides_template: str,
@@ -280,16 +336,8 @@ def _handle_default_display(
280
  # The bullet_points may contain a nested hierarchy of JSON arrays
281
  # In some scenarios, it may contain objects (dictionaries) because the LLM generated so
282
  # ^ The second scenario is not covered
283
-
284
  flat_items_list = get_flat_list_of_contents(slide_json['bullet_points'], level=0)
285
-
286
- for idx, an_item in enumerate(flat_items_list):
287
- if idx == 0:
288
- text_frame.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
289
- else:
290
- paragraph = text_frame.add_paragraph()
291
- paragraph.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
292
- paragraph.level = an_item[1]
293
 
294
  _handle_key_message(
295
  the_slide=slide,
@@ -345,14 +393,7 @@ def _handle_display_image__in_foreground(
345
  text_col: SlidePlaceholder = slide.shapes.placeholders[idx]
346
 
347
  flat_items_list = get_flat_list_of_contents(slide_json['bullet_points'], level=0)
348
-
349
- for idx, an_item in enumerate(flat_items_list):
350
- if idx == 0:
351
- text_col.text_frame.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
352
- else:
353
- paragraph = text_col.text_frame.add_paragraph()
354
- paragraph.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
355
- paragraph.level = an_item[1]
356
 
357
  if not img_keywords:
358
  # No keywords, so no image search and addition
@@ -418,14 +459,7 @@ def _handle_display_image__in_background(
418
  title_shape.text = remove_slide_number_from_heading(slide_json['heading'])
419
 
420
  flat_items_list = get_flat_list_of_contents(slide_json['bullet_points'], level=0)
421
-
422
- for idx, an_item in enumerate(flat_items_list):
423
- if idx == 0:
424
- body_shape.text_frame.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
425
- else:
426
- paragraph = body_shape.text_frame.add_paragraph()
427
- paragraph.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
428
- paragraph.level = an_item[1]
429
 
430
  if not img_keywords:
431
  # No keywords, so no image search and addition
@@ -537,7 +571,6 @@ def _handle_icons_ideas(
537
 
538
  # Set the icon's background shape color
539
  shape.fill.fore_color.rgb = shape.line.color.rgb = random.choice(ICON_COLORS)
540
-
541
  # Add the icon image on top of the colored shape
542
  slide.shapes.add_picture(icon_path, left, top, height=ICON_SIZE)
543
 
@@ -550,9 +583,9 @@ def _handle_icons_ideas(
550
  height=text_box_size
551
  )
552
  text_frame = text_box.text_frame
553
- text_frame.text = accompanying_text
554
  text_frame.word_wrap = True
555
  text_frame.paragraphs[0].alignment = pptx.enum.text.PP_ALIGN.CENTER
 
556
 
557
  # Center the text vertically
558
  text_frame.vertical_anchor = pptx.enum.text.MSO_ANCHOR.MIDDLE
@@ -685,13 +718,7 @@ def _handle_double_col_layout(
685
  if not left_heading:
686
  left_col_frame.text = double_col_content[0]['heading']
687
 
688
- for idx, an_item in enumerate(flat_items_list):
689
- if left_heading and idx == 0:
690
- left_col_frame.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
691
- else:
692
- paragraph = left_col_frame.add_paragraph()
693
- paragraph.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
694
- paragraph.level = an_item[1]
695
 
696
  if 'heading' in double_col_content[1] and right_heading:
697
  right_heading.text = double_col_content[1]['heading']
@@ -703,13 +730,7 @@ def _handle_double_col_layout(
703
  if not right_heading:
704
  right_col_frame.text = double_col_content[1]['heading']
705
 
706
- for idx, an_item in enumerate(flat_items_list):
707
- if right_col_frame and idx == 0:
708
- right_col_frame.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
709
- else:
710
- paragraph = right_col_frame.add_paragraph()
711
- paragraph.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
712
- paragraph.level = an_item[1]
713
 
714
  _handle_key_message(
715
  the_slide=slide,
@@ -792,7 +813,11 @@ def _handle_step_by_step_process(
792
 
793
  for step in steps:
794
  shape = shapes.add_shape(MSO_AUTO_SHAPE_TYPE.CHEVRON, left, top, width, height)
795
- shape.text = step.removeprefix(STEP_BY_STEP_PROCESS_MARKER)
 
 
 
 
796
  left += width - INCHES_0_4
797
  elif 4 < n_steps <= 6:
798
  # Vertical display
@@ -817,7 +842,11 @@ def _handle_step_by_step_process(
817
 
818
  for step in steps:
819
  shape = shapes.add_shape(MSO_AUTO_SHAPE_TYPE.PENTAGON, left, top, width, height)
820
- shape.text = step.removeprefix(STEP_BY_STEP_PROCESS_MARKER)
 
 
 
 
821
  top += height + INCHES_0_3
822
  left += INCHES_0_5
823
 
@@ -851,7 +880,7 @@ def _handle_key_message(
851
  width=width,
852
  height=height
853
  )
854
- shape.text = slide_json['key_message']
855
 
856
 
857
  def _get_slide_width_height_inches(presentation: pptx.Presentation) -> Tuple[float, float]:
@@ -864,7 +893,6 @@ def _get_slide_width_height_inches(presentation: pptx.Presentation) -> Tuple[flo
864
 
865
  slide_width_inch = EMU_TO_INCH_SCALING_FACTOR * presentation.slide_width
866
  slide_height_inch = EMU_TO_INCH_SCALING_FACTOR * presentation.slide_height
867
- # logger.debug('Slide width: %f, height: %f', slide_width_inch, slide_height_inch)
868
 
869
  return slide_width_inch, slide_height_inch
870
 
@@ -877,7 +905,7 @@ if __name__ == '__main__':
877
  {
878
  "heading": "Introduction to AI Applications",
879
  "bullet_points": [
880
- "Artificial Intelligence (AI) is transforming various industries",
881
  "AI applications range from simple decision-making tools to complex systems",
882
  "AI can be categorized into types: Rule-based, Instance-based, and Model-based"
883
  ],
@@ -887,9 +915,9 @@ if __name__ == '__main__':
887
  {
888
  "heading": "AI in Everyday Life",
889
  "bullet_points": [
890
- "Virtual assistants like Siri, Alexa, and Google Assistant",
891
- "Recommender systems in Netflix, Amazon, and Spotify",
892
- "Fraud detection in banking and credit card transactions"
893
  ],
894
  "key_message": "AI is integrated into our daily lives through various services",
895
  "img_keywords": "virtual assistants, recommender systems, fraud detection"
@@ -939,11 +967,11 @@ if __name__ == '__main__':
939
  {
940
  "heading": "Step-by-Step: AI Development Process",
941
  "bullet_points": [
942
- ">> Define the problem and objectives",
943
- ">> Collect and preprocess data",
944
- ">> Select and train the AI model",
945
- ">> Evaluate and optimize the model",
946
- ">> Deploy and monitor the AI system"
947
  ],
948
  "key_message": "Developing AI involves a structured process from problem definition to deployment",
949
  "img_keywords": ""
@@ -951,11 +979,11 @@ if __name__ == '__main__':
951
  {
952
  "heading": "AI Icons: Key Aspects",
953
  "bullet_points": [
954
- "[[brain]] Human-like intelligence and decision-making",
955
- "[[robot]] Automation and physical tasks",
956
  "[[]] Data processing and cloud computing",
957
- "[[lightbulb]] Insights and predictions",
958
- "[[globe2]] Global connectivity and impact"
959
  ],
960
  "key_message": "AI encompasses various aspects, from human-like intelligence to global impact",
961
  "img_keywords": "AI aspects, intelligence, automation, data processing, global impact"
@@ -968,7 +996,7 @@ if __name__ == '__main__':
968
  "Invest in AI education and workforce development",
969
  "Call to action: Explore AI applications and contribute to shaping its future"
970
  ],
971
- "key_message": "AI offers immense potential, and we must embrace it responsibly",
972
  "img_keywords": "AI transformation, ethical considerations, AI education, future of AI"
973
  }
974
  ]
 
52
 
53
  SLIDE_NUMBER_REGEX = re.compile(r"^slide[ ]+\d+:", re.IGNORECASE)
54
  ICONS_REGEX = re.compile(r"\[\[(.*?)\]\]\s*(.*)")
55
+ BOLD_ITALICS_PATTERN = re.compile(r'(\*\*(.*?)\*\*|\*(.*?)\*)')
56
 
57
  ICON_COLORS = [
58
  pptx.dml.color.RGBColor.from_string('800000'), # Maroon
 
83
  return header
84
 
85
 
86
+ def add_bulleted_items(text_frame: pptx.text.text.TextFrame, flat_items_list: list):
87
+ """
88
+ Add a list of texts as bullet points and apply formatting.
89
+
90
+ :param text_frame: The text frame where text is to be displayed.
91
+ :param flat_items_list: The list of items to be displayed.
92
+ """
93
+
94
+ for idx, an_item in enumerate(flat_items_list):
95
+ if idx == 0:
96
+ paragraph = text_frame.paragraphs[0] # First paragraph for title text
97
+ else:
98
+ paragraph = text_frame.add_paragraph()
99
+ paragraph.level = an_item[1]
100
+
101
+ format_text(paragraph, an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER))
102
+
103
+
104
+ def format_text(frame_paragraph, text):
105
+ """
106
+ Apply bold and italic formatting while preserving the original word order
107
+ without duplication.
108
+ """
109
+
110
+ matches = list(BOLD_ITALICS_PATTERN.finditer(text))
111
+ last_index = 0 # Track position in the text
112
+ # Group 0: Full match (e.g., **bold** or *italic*)
113
+ # Group 1: The outer parentheses (captures either bold or italic match, because of |)
114
+ # Group 2: The bold text inside **bold**
115
+ # Group 3: The italic text inside *italic*
116
+ for match in matches:
117
+ start, end = match.span()
118
+ # Add unformatted text before the formatted section
119
+ if start > last_index:
120
+ run = frame_paragraph.add_run()
121
+ run.text = text[last_index:start]
122
+
123
+ # Extract formatted text
124
+ if match.group(2): # Bold
125
+ run = frame_paragraph.add_run()
126
+ run.text = match.group(2)
127
+ run.font.bold = True
128
+ elif match.group(3): # Italics
129
+ run = frame_paragraph.add_run()
130
+ run.text = match.group(3)
131
+ run.font.italic = True
132
+
133
+ last_index = end # Update position
134
+
135
+ # Add any remaining unformatted text
136
+ if last_index < len(text):
137
+ run = frame_paragraph.add_run()
138
+ run.text = text[last_index:]
139
+
140
+
141
  def generate_powerpoint_presentation(
142
  parsed_data: dict,
143
  slides_template: str,
 
336
  # The bullet_points may contain a nested hierarchy of JSON arrays
337
  # In some scenarios, it may contain objects (dictionaries) because the LLM generated so
338
  # ^ The second scenario is not covered
 
339
  flat_items_list = get_flat_list_of_contents(slide_json['bullet_points'], level=0)
340
+ add_bulleted_items(text_frame, flat_items_list)
 
 
 
 
 
 
 
341
 
342
  _handle_key_message(
343
  the_slide=slide,
 
393
  text_col: SlidePlaceholder = slide.shapes.placeholders[idx]
394
 
395
  flat_items_list = get_flat_list_of_contents(slide_json['bullet_points'], level=0)
396
+ add_bulleted_items(text_col.text_frame, flat_items_list)
 
 
 
 
 
 
 
397
 
398
  if not img_keywords:
399
  # No keywords, so no image search and addition
 
459
  title_shape.text = remove_slide_number_from_heading(slide_json['heading'])
460
 
461
  flat_items_list = get_flat_list_of_contents(slide_json['bullet_points'], level=0)
462
+ add_bulleted_items(body_shape.text_frame, flat_items_list)
 
 
 
 
 
 
 
463
 
464
  if not img_keywords:
465
  # No keywords, so no image search and addition
 
571
 
572
  # Set the icon's background shape color
573
  shape.fill.fore_color.rgb = shape.line.color.rgb = random.choice(ICON_COLORS)
 
574
  # Add the icon image on top of the colored shape
575
  slide.shapes.add_picture(icon_path, left, top, height=ICON_SIZE)
576
 
 
583
  height=text_box_size
584
  )
585
  text_frame = text_box.text_frame
 
586
  text_frame.word_wrap = True
587
  text_frame.paragraphs[0].alignment = pptx.enum.text.PP_ALIGN.CENTER
588
+ format_text(text_frame.paragraphs[0], accompanying_text)
589
 
590
  # Center the text vertically
591
  text_frame.vertical_anchor = pptx.enum.text.MSO_ANCHOR.MIDDLE
 
718
  if not left_heading:
719
  left_col_frame.text = double_col_content[0]['heading']
720
 
721
+ add_bulleted_items(left_col_frame, flat_items_list)
 
 
 
 
 
 
722
 
723
  if 'heading' in double_col_content[1] and right_heading:
724
  right_heading.text = double_col_content[1]['heading']
 
730
  if not right_heading:
731
  right_col_frame.text = double_col_content[1]['heading']
732
 
733
+ add_bulleted_items(right_col_frame, flat_items_list)
 
 
 
 
 
 
734
 
735
  _handle_key_message(
736
  the_slide=slide,
 
813
 
814
  for step in steps:
815
  shape = shapes.add_shape(MSO_AUTO_SHAPE_TYPE.CHEVRON, left, top, width, height)
816
+ text_frame = shape.text_frame
817
+ text_frame.clear()
818
+ paragraph = text_frame.paragraphs[0]
819
+ paragraph.alignment = pptx.enum.text.PP_ALIGN.LEFT
820
+ format_text(paragraph, step.removeprefix(STEP_BY_STEP_PROCESS_MARKER))
821
  left += width - INCHES_0_4
822
  elif 4 < n_steps <= 6:
823
  # Vertical display
 
842
 
843
  for step in steps:
844
  shape = shapes.add_shape(MSO_AUTO_SHAPE_TYPE.PENTAGON, left, top, width, height)
845
+ text_frame = shape.text_frame
846
+ text_frame.clear()
847
+ paragraph = text_frame.paragraphs[0]
848
+ paragraph.alignment = pptx.enum.text.PP_ALIGN.LEFT
849
+ format_text(paragraph, step.removeprefix(STEP_BY_STEP_PROCESS_MARKER))
850
  top += height + INCHES_0_3
851
  left += INCHES_0_5
852
 
 
880
  width=width,
881
  height=height
882
  )
883
+ format_text(shape.text_frame.paragraphs[0], slide_json['key_message'])
884
 
885
 
886
  def _get_slide_width_height_inches(presentation: pptx.Presentation) -> Tuple[float, float]:
 
893
 
894
  slide_width_inch = EMU_TO_INCH_SCALING_FACTOR * presentation.slide_width
895
  slide_height_inch = EMU_TO_INCH_SCALING_FACTOR * presentation.slide_height
 
896
 
897
  return slide_width_inch, slide_height_inch
898
 
 
905
  {
906
  "heading": "Introduction to AI Applications",
907
  "bullet_points": [
908
+ "Artificial Intelligence (AI) is *transforming* various industries",
909
  "AI applications range from simple decision-making tools to complex systems",
910
  "AI can be categorized into types: Rule-based, Instance-based, and Model-based"
911
  ],
 
915
  {
916
  "heading": "AI in Everyday Life",
917
  "bullet_points": [
918
+ "**Virtual assistants** like Siri, Alexa, and Google Assistant",
919
+ "**Recommender systems** in Netflix, Amazon, and Spotify",
920
+ "**Fraud detection** in banking and *credit card* transactions"
921
  ],
922
  "key_message": "AI is integrated into our daily lives through various services",
923
  "img_keywords": "virtual assistants, recommender systems, fraud detection"
 
967
  {
968
  "heading": "Step-by-Step: AI Development Process",
969
  "bullet_points": [
970
+ ">> **Step 1:** Define the problem and objectives",
971
+ ">> **Step 2:** Collect and preprocess data",
972
+ ">> **Step 3:** Select and train the AI model",
973
+ ">> **Step 4:** Evaluate and optimize the model",
974
+ ">> **Step 5:** Deploy and monitor the AI system"
975
  ],
976
  "key_message": "Developing AI involves a structured process from problem definition to deployment",
977
  "img_keywords": ""
 
979
  {
980
  "heading": "AI Icons: Key Aspects",
981
  "bullet_points": [
982
+ "[[brain]] Human-like *intelligence* and decision-making",
983
+ "[[robot]] Automation and physical *tasks*",
984
  "[[]] Data processing and cloud computing",
985
+ "[[lightbulb]] Insights and *predictions*",
986
+ "[[globe2]] Global connectivity and *impact*"
987
  ],
988
  "key_message": "AI encompasses various aspects, from human-like intelligence to global impact",
989
  "img_keywords": "AI aspects, intelligence, automation, data processing, global impact"
 
996
  "Invest in AI education and workforce development",
997
  "Call to action: Explore AI applications and contribute to shaping its future"
998
  ],
999
+ "key_message": "AI offers *immense potential*, and we must embrace it responsibly",
1000
  "img_keywords": "AI transformation, ethical considerations, AI education, future of AI"
1001
  }
1002
  ]