Spaces:

Pudding48
/

TinyLLamaTest2

Sleeping

App Files Files Community

Pudding48 commited on Jul 6

Commit

07b255e

verified ·

1 Parent(s): 5a05052

Update prepare_vector_dp.py

Browse files

Files changed (1) hide show

prepare_vector_dp.py +19 -2

prepare_vector_dp.py CHANGED Viewed

@@ -1,10 +1,12 @@
 from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
 from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
 from langchain_community.vectorstores import FAISS
-from langchain_community.embeddings import GPT4AllEmbeddings
 from huggingface_hub import hf_hub_download
 # from llama_cpp import Llama
 import os
@@ -42,7 +44,22 @@ def create_db_from_text():
     chunks = text_splitter.split_text(raw_text)
     # Embeding
     embedding_model = GPT4AllEmbeddings(model_file= model_file)
     # Dua vao Faiss Vector DB
     db = FAISS.from_texts(texts=chunks, embedding=embedding_model)
@@ -59,7 +76,7 @@ def create_dp_from_files():
     text_splitter = CharacterTextSplitter(chunk_size = 512, chunk_overlap = 50)
     chunks = text_splitter.split_documents(documents)
-    embedding_model = GPT4AllEmbeddings(model_file = model_file)
     dp = FAISS.from_documents(chunks, embedding_model)
     dp.save_local(vector_dp_path)
     return dp

 from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
 from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
 from langchain_community.vectorstores import FAISS
+#from langchain_community.embeddings import GPT4AllEmbeddings
+from langchain_community.embeddings import HuggingFaceEmbeddings
 from huggingface_hub import hf_hub_download
 # from llama_cpp import Llama
 import os
     chunks = text_splitter.split_text(raw_text)
     # Embeding
+    '''
+    🔥 The gpt4all embedding library you’re using was compiled against GLIBC 2.32 or higher,
+    but the Hugging Face Docker environment only provides GLIBC 2.31 or lower.
+    So your Space crashes because it tries to load a C-based .so library that depends on a newer system-level runtime.
+    🧠 What is GLIBC?
+    GLIBC is the GNU C standard library — it’s a low-level part of Linux.
+    Most .so libraries (like libllmodel.so) built from C++ depend on a minimum GLIBC version.
+    You cannot change GLIBC in Hugging Face Docker — so if your library requires GLIBC 2.32+, it will not run.
     embedding_model = GPT4AllEmbeddings(model_file= model_file)
+    '''
+    embedding_model = HuggingFaceEmbeddings(model_file= model_file)
     # Dua vao Faiss Vector DB
     db = FAISS.from_texts(texts=chunks, embedding=embedding_model)
     text_splitter = CharacterTextSplitter(chunk_size = 512, chunk_overlap = 50)
     chunks = text_splitter.split_documents(documents)
+    embedding_model = HuggingFaceEmbeddings(model_file = model_file)
     dp = FAISS.from_documents(chunks, embedding_model)
     dp.save_local(vector_dp_path)
     return dp