Spaces:

nightfury
/

ChromaDB

Runtime error

App Files Files Community

nightfury commited on Jan 27

Commit

f5a896d

verified ·

1 Parent(s): 0642011

Update appChatbot.py

Browse files

Files changed (1) hide show

appChatbot.py +42 -0

appChatbot.py CHANGED Viewed

@@ -16,6 +16,10 @@ from langchain.vectorstores import Chroma
 from langchain.document_loaders import PyPDFLoader
 from fastapi.encoders import jsonable_encoder
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
@@ -31,6 +35,44 @@ embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
 ABS_PATH = os.path.dirname(os.path.abspath(__file__))
 DB_DIR = os.path.join(ABS_PATH, "db")
 vectorstore = None
 def replace_newlines_and_spaces(text):

 from langchain.document_loaders import PyPDFLoader
 from fastapi.encoders import jsonable_encoder
+from langchain.embeddings import HuggingFaceInstructEmbeddings
+from langchain.vectorstores.faiss import FAISS
+from huggingface_hub import snapshot_download
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
 ABS_PATH = os.path.dirname(os.path.abspath(__file__))
 DB_DIR = os.path.join(ABS_PATH, "db")
+cache_dir=f"{book}_cache"
+vectorstore = snapshot_download(repo_id="calmgoose/book-embeddings",
+                                repo_type="dataset",
+                                revision="main",
+                                allow_patterns=f"books/{BOOK}/*", # to download only the one book
+                                cache_dir=cache_dir,
+                                )
+# get path to the `vectorstore` folder that you just downloaded
+# we'll look inside the `cache_dir` for the folder we want
+target_dir = BOOK
+# Walk through the directory tree recursively
+for root, dirs, files in os.walk(cache_dir):
+    # Check if the target directory is in the list of directories
+    if target_dir in dirs:
+        # Get the full path of the target directory
+        target_path = os.path.join(root, target_dir)
+# load embeddings
+# this is what was used to create embeddings for the book
+embeddings = HuggingFaceInstructEmbeddings(
+    embed_instruction="Represent the book passage for retrieval: ",
+    query_instruction="Represent the question for retrieving supporting texts from the book passage: "
+    )
+# load vector store to use with langchain
+docsearch = FAISS.load_local(folder_path=target_path, embeddings=embeddings)
+# similarity search
+question = "Who is big brother?"
+search = docsearch.similarity_search(question, k=4)
+for item in search:
+    print(item.page_content)
+    print(f"From page: {item.metadata['page']}")
+    print("---")
 vectorstore = None
 def replace_newlines_and_spaces(text):