Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,9 @@ import gradio as gr
|
|
2 |
from docx import Document
|
3 |
import os
|
4 |
|
|
|
|
|
|
|
5 |
def split_by_headers(file_path, headers_per_chunk=1):
|
6 |
doc = Document(file_path)
|
7 |
chunks = []
|
@@ -9,8 +12,8 @@ def split_by_headers(file_path, headers_per_chunk=1):
|
|
9 |
header_count = 0
|
10 |
|
11 |
for element in doc.element.body:
|
12 |
-
if element
|
13 |
-
paragraph = element
|
14 |
if any(style.val.startswith('Heading') for style in paragraph.xpath('.//w:pStyle')):
|
15 |
header_count += 1
|
16 |
if header_count > headers_per_chunk:
|
|
|
2 |
from docx import Document
|
3 |
import os
|
4 |
|
5 |
+
from docx import Document
|
6 |
+
from docx.oxml import CT_P
|
7 |
+
|
8 |
def split_by_headers(file_path, headers_per_chunk=1):
|
9 |
doc = Document(file_path)
|
10 |
chunks = []
|
|
|
12 |
header_count = 0
|
13 |
|
14 |
for element in doc.element.body:
|
15 |
+
if isinstance(element, CT_P):
|
16 |
+
paragraph = element
|
17 |
if any(style.val.startswith('Heading') for style in paragraph.xpath('.//w:pStyle')):
|
18 |
header_count += 1
|
19 |
if header_count > headers_per_chunk:
|