Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -74,7 +74,7 @@ import chromadb
|
|
74 |
|
75 |
|
76 |
|
77 |
-
|
78 |
def extract_text_from_pdf(pdf_path):
|
79 |
text = ""
|
80 |
reader = PdfReader(pdf_path)
|
@@ -82,10 +82,11 @@ def extract_text_from_pdf(pdf_path):
|
|
82 |
text += page.extract_text() + " " # Concatenate text from each page
|
83 |
return text
|
84 |
|
85 |
-
#
|
86 |
pdf_text = extract_text_from_pdf(pdf_path3)
|
87 |
|
88 |
|
|
|
89 |
@st.cache_resource
|
90 |
def load_vector_store(file_path, store_name, force_reload=False):
|
91 |
local_repo_path = "Private_Book"
|
@@ -522,10 +523,18 @@ def page3():
|
|
522 |
if not os.path.exists(pdf_path3):
|
523 |
st.error("File not found. Please check the file path.")
|
524 |
return
|
525 |
-
|
526 |
-
# Initialize CromA client
|
527 |
chroma_client = chromadb.Client()
|
528 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
529 |
|
530 |
# Add the extracted text from PDF to the Chroma collection
|
531 |
collection.add(
|
|
|
74 |
|
75 |
|
76 |
|
77 |
+
@st.cache
|
78 |
def extract_text_from_pdf(pdf_path):
|
79 |
text = ""
|
80 |
reader = PdfReader(pdf_path)
|
|
|
82 |
text += page.extract_text() + " " # Concatenate text from each page
|
83 |
return text
|
84 |
|
85 |
+
# Use the function to get pdf_text
|
86 |
pdf_text = extract_text_from_pdf(pdf_path3)
|
87 |
|
88 |
|
89 |
+
|
90 |
@st.cache_resource
|
91 |
def load_vector_store(file_path, store_name, force_reload=False):
|
92 |
local_repo_path = "Private_Book"
|
|
|
523 |
if not os.path.exists(pdf_path3):
|
524 |
st.error("File not found. Please check the file path.")
|
525 |
return
|
526 |
+
|
527 |
+
# Initialize CromA client
|
528 |
chroma_client = chromadb.Client()
|
529 |
+
|
530 |
+
# Check if the collection already exists
|
531 |
+
if "Kosten_Strukturdaten0602204" not in chroma_client.list_collections():
|
532 |
+
# Create the collection if it does not exist
|
533 |
+
collection = chroma_client.create_collection(name="Kosten_Strukturdaten0602204")
|
534 |
+
else:
|
535 |
+
# Use the existing collection if it already exists
|
536 |
+
collection = chroma_client.get_collection(name="Kosten_Strukturdaten0602204")
|
537 |
+
|
538 |
|
539 |
# Add the extracted text from PDF to the Chroma collection
|
540 |
collection.add(
|