Update utils.py
Browse files
utils.py
CHANGED
@@ -57,7 +57,6 @@ from chromadb.errors import InvalidDimensionException
|
|
57 |
import fitz # PyMuPDF
|
58 |
import docx
|
59 |
from langchain.document_loaders import DirectoryLoader
|
60 |
-
from langchain.document_loaders.pydantic import Document
|
61 |
#import io
|
62 |
#from PIL import Image, ImageDraw, ImageOps, ImageFont
|
63 |
#import base64
|
@@ -256,7 +255,7 @@ def load_pdf_with_metadata(file_path):
|
|
256 |
"page": page_num + 1,
|
257 |
"path": file_path
|
258 |
}
|
259 |
-
documents.append(
|
260 |
return documents
|
261 |
|
262 |
def load_word_with_metadata(file_path):
|
@@ -268,8 +267,7 @@ def load_word_with_metadata(file_path):
|
|
268 |
contents = []
|
269 |
for para in document.paragraphs:
|
270 |
content = para.text
|
271 |
-
|
272 |
-
contents.append(Document(content=content, metadata={**metadata, "page": 1}))
|
273 |
return contents
|
274 |
|
275 |
|
|
|
57 |
import fitz # PyMuPDF
|
58 |
import docx
|
59 |
from langchain.document_loaders import DirectoryLoader
|
|
|
60 |
#import io
|
61 |
#from PIL import Image, ImageDraw, ImageOps, ImageFont
|
62 |
#import base64
|
|
|
255 |
"page": page_num + 1,
|
256 |
"path": file_path
|
257 |
}
|
258 |
+
documents.append({"content": content, "metadata": metadata})
|
259 |
return documents
|
260 |
|
261 |
def load_word_with_metadata(file_path):
|
|
|
267 |
contents = []
|
268 |
for para in document.paragraphs:
|
269 |
content = para.text
|
270 |
+
contents.append({"content": content, "metadata": {**metadata, "page": 1}})
|
|
|
271 |
return contents
|
272 |
|
273 |
|