KingNish commited on
Commit
40ddcd4
·
verified ·
1 Parent(s): 930d181

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -10
app.py CHANGED
@@ -3,7 +3,7 @@ from openpyxl import load_workbook
3
  from pptx import Presentation
4
  import gradio as gr
5
  import io
6
- import docx2python
7
  from huggingface_hub import InferenceClient
8
 
9
  # Initialize the Mistral chat model
@@ -52,16 +52,10 @@ def read_document(file):
52
 
53
  elif file_extension == 'doc' or file_extension == 'docx':
54
  try:
55
- doc_result = docx2python.convert(io.BytesIO(file_content))
56
  content = ''
57
- for page in doc_result:
58
- for paragraph in page:
59
- if isinstance(paragraph, str):
60
- content += paragraph + ' '
61
- elif isinstance(paragraph, list):
62
- for sub_paragraph in paragraph:
63
- if isinstance(sub_paragraph, str):
64
- content += sub_paragraph + ' '
65
  return content
66
  except Exception as e:
67
  return f"Error reading DOC/DOCX: {e}"
 
3
  from pptx import Presentation
4
  import gradio as gr
5
  import io
6
+ import docx2python
7
  from huggingface_hub import InferenceClient
8
 
9
  # Initialize the Mistral chat model
 
52
 
53
  elif file_extension == 'doc' or file_extension == 'docx':
54
  try:
55
+ doc_result = docx2python.Docx2Python(io.BytesIO(file_content)).process()
56
  content = ''
57
+ for paragraph in doc_result.text:
58
+ content += paragraph + ' '
 
 
 
 
 
 
59
  return content
60
  except Exception as e:
61
  return f"Error reading DOC/DOCX: {e}"