lik07 commited on
Commit
4d7fa61
·
verified ·
1 Parent(s): d697844

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -2,6 +2,9 @@ import gradio as gr
2
  from docx import Document
3
  import os
4
 
 
 
 
5
  def split_by_headers(file_path, headers_per_chunk=1):
6
  doc = Document(file_path)
7
  chunks = []
@@ -9,8 +12,8 @@ def split_by_headers(file_path, headers_per_chunk=1):
9
  header_count = 0
10
 
11
  for element in doc.element.body:
12
- if element.tag.endswith('p'):
13
- paragraph = element._element
14
  if any(style.val.startswith('Heading') for style in paragraph.xpath('.//w:pStyle')):
15
  header_count += 1
16
  if header_count > headers_per_chunk:
 
2
  from docx import Document
3
  import os
4
 
5
+ from docx import Document
6
+ from docx.oxml import CT_P
7
+
8
  def split_by_headers(file_path, headers_per_chunk=1):
9
  doc = Document(file_path)
10
  chunks = []
 
12
  header_count = 0
13
 
14
  for element in doc.element.body:
15
+ if isinstance(element, CT_P):
16
+ paragraph = element
17
  if any(style.val.startswith('Heading') for style in paragraph.xpath('.//w:pStyle')):
18
  header_count += 1
19
  if header_count > headers_per_chunk: