KingNish commited on
Commit
0259009
·
verified ·
1 Parent(s): 40ddcd4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -6
app.py CHANGED
@@ -3,7 +3,7 @@ from openpyxl import load_workbook
3
  from pptx import Presentation
4
  import gradio as gr
5
  import io
6
- import docx2python
7
  from huggingface_hub import InferenceClient
8
 
9
  # Initialize the Mistral chat model
@@ -52,11 +52,9 @@ def read_document(file):
52
 
53
  elif file_extension == 'doc' or file_extension == 'docx':
54
  try:
55
- doc_result = docx2python.Docx2Python(io.BytesIO(file_content)).process()
56
- content = ''
57
- for paragraph in doc_result.text:
58
- content += paragraph + ' '
59
- return content
60
  except Exception as e:
61
  return f"Error reading DOC/DOCX: {e}"
62
 
 
3
  from pptx import Presentation
4
  import gradio as gr
5
  import io
6
+ from docx2python import docx2python
7
  from huggingface_hub import InferenceClient
8
 
9
  # Initialize the Mistral chat model
 
52
 
53
  elif file_extension == 'doc' or file_extension == 'docx':
54
  try:
55
+ # extract docx content
56
+ with docx2python(io.BytesIO(file_content)) as content:
57
+ return content.text
 
 
58
  except Exception as e:
59
  return f"Error reading DOC/DOCX: {e}"
60