Anne31415 commited on
Commit
8805e7e
·
verified ·
1 Parent(s): 16f7be1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -5
app.py CHANGED
@@ -74,7 +74,7 @@ import chromadb
74
 
75
 
76
 
77
- # Function to extract text from a PDF file
78
  def extract_text_from_pdf(pdf_path):
79
  text = ""
80
  reader = PdfReader(pdf_path)
@@ -82,10 +82,11 @@ def extract_text_from_pdf(pdf_path):
82
  text += page.extract_text() + " " # Concatenate text from each page
83
  return text
84
 
85
- # Example usage
86
  pdf_text = extract_text_from_pdf(pdf_path3)
87
 
88
 
 
89
  @st.cache_resource
90
  def load_vector_store(file_path, store_name, force_reload=False):
91
  local_repo_path = "Private_Book"
@@ -522,10 +523,18 @@ def page3():
522
  if not os.path.exists(pdf_path3):
523
  st.error("File not found. Please check the file path.")
524
  return
525
-
526
- # Initialize CromA client and collection
527
  chroma_client = chromadb.Client()
528
- collection = chroma_client.create_collection(name="Kosten_Strukturdaten0602204")
 
 
 
 
 
 
 
 
529
 
530
  # Add the extracted text from PDF to the Chroma collection
531
  collection.add(
 
74
 
75
 
76
 
77
+ @st.cache
78
  def extract_text_from_pdf(pdf_path):
79
  text = ""
80
  reader = PdfReader(pdf_path)
 
82
  text += page.extract_text() + " " # Concatenate text from each page
83
  return text
84
 
85
+ # Use the function to get pdf_text
86
  pdf_text = extract_text_from_pdf(pdf_path3)
87
 
88
 
89
+
90
  @st.cache_resource
91
  def load_vector_store(file_path, store_name, force_reload=False):
92
  local_repo_path = "Private_Book"
 
523
  if not os.path.exists(pdf_path3):
524
  st.error("File not found. Please check the file path.")
525
  return
526
+
527
+ # Initialize CromA client
528
  chroma_client = chromadb.Client()
529
+
530
+ # Check if the collection already exists
531
+ if "Kosten_Strukturdaten0602204" not in chroma_client.list_collections():
532
+ # Create the collection if it does not exist
533
+ collection = chroma_client.create_collection(name="Kosten_Strukturdaten0602204")
534
+ else:
535
+ # Use the existing collection if it already exists
536
+ collection = chroma_client.get_collection(name="Kosten_Strukturdaten0602204")
537
+
538
 
539
  # Add the extracted text from PDF to the Chroma collection
540
  collection.add(