alexkueck commited on
Commit
d89974b
·
verified ·
1 Parent(s): bb806af

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +2 -4
utils.py CHANGED
@@ -57,7 +57,6 @@ from chromadb.errors import InvalidDimensionException
57
  import fitz # PyMuPDF
58
  import docx
59
  from langchain.document_loaders import DirectoryLoader
60
- from langchain.document_loaders.pydantic import Document
61
  #import io
62
  #from PIL import Image, ImageDraw, ImageOps, ImageFont
63
  #import base64
@@ -256,7 +255,7 @@ def load_pdf_with_metadata(file_path):
256
  "page": page_num + 1,
257
  "path": file_path
258
  }
259
- documents.append(Document(content=content, metadata=metadata))
260
  return documents
261
 
262
  def load_word_with_metadata(file_path):
@@ -268,8 +267,7 @@ def load_word_with_metadata(file_path):
268
  contents = []
269
  for para in document.paragraphs:
270
  content = para.text
271
- # Hier wird keine Seitenzahl verwendet, aber Sie können zusätzliche Logik hinzufügen
272
- contents.append(Document(content=content, metadata={**metadata, "page": 1}))
273
  return contents
274
 
275
 
 
57
  import fitz # PyMuPDF
58
  import docx
59
  from langchain.document_loaders import DirectoryLoader
 
60
  #import io
61
  #from PIL import Image, ImageDraw, ImageOps, ImageFont
62
  #import base64
 
255
  "page": page_num + 1,
256
  "path": file_path
257
  }
258
+ documents.append({"content": content, "metadata": metadata})
259
  return documents
260
 
261
  def load_word_with_metadata(file_path):
 
267
  contents = []
268
  for para in document.paragraphs:
269
  content = para.text
270
+ contents.append({"content": content, "metadata": {**metadata, "page": 1}})
 
271
  return contents
272
 
273