Spaces:
Sleeping
Sleeping
Update prepare_vector_dp.py
Browse files- prepare_vector_dp.py +19 -2
prepare_vector_dp.py
CHANGED
@@ -1,10 +1,12 @@
|
|
1 |
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
|
2 |
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
|
3 |
from langchain_community.vectorstores import FAISS
|
4 |
-
from langchain_community.embeddings import GPT4AllEmbeddings
|
|
|
5 |
|
6 |
from huggingface_hub import hf_hub_download
|
7 |
|
|
|
8 |
# from llama_cpp import Llama
|
9 |
import os
|
10 |
|
@@ -42,7 +44,22 @@ def create_db_from_text():
|
|
42 |
chunks = text_splitter.split_text(raw_text)
|
43 |
|
44 |
# Embeding
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
embedding_model = GPT4AllEmbeddings(model_file= model_file)
|
|
|
|
|
|
|
46 |
|
47 |
# Dua vao Faiss Vector DB
|
48 |
db = FAISS.from_texts(texts=chunks, embedding=embedding_model)
|
@@ -59,7 +76,7 @@ def create_dp_from_files():
|
|
59 |
text_splitter = CharacterTextSplitter(chunk_size = 512, chunk_overlap = 50)
|
60 |
chunks = text_splitter.split_documents(documents)
|
61 |
|
62 |
-
embedding_model =
|
63 |
dp = FAISS.from_documents(chunks, embedding_model)
|
64 |
dp.save_local(vector_dp_path)
|
65 |
return dp
|
|
|
1 |
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
|
2 |
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
|
3 |
from langchain_community.vectorstores import FAISS
|
4 |
+
#from langchain_community.embeddings import GPT4AllEmbeddings
|
5 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
6 |
|
7 |
from huggingface_hub import hf_hub_download
|
8 |
|
9 |
+
|
10 |
# from llama_cpp import Llama
|
11 |
import os
|
12 |
|
|
|
44 |
chunks = text_splitter.split_text(raw_text)
|
45 |
|
46 |
# Embeding
|
47 |
+
'''
|
48 |
+
🔥 The gpt4all embedding library you’re using was compiled against GLIBC 2.32 or higher,
|
49 |
+
but the Hugging Face Docker environment only provides GLIBC 2.31 or lower.
|
50 |
+
|
51 |
+
So your Space crashes because it tries to load a C-based .so library that depends on a newer system-level runtime.
|
52 |
+
|
53 |
+
🧠 What is GLIBC?
|
54 |
+
GLIBC is the GNU C standard library — it’s a low-level part of Linux.
|
55 |
+
Most .so libraries (like libllmodel.so) built from C++ depend on a minimum GLIBC version.
|
56 |
+
|
57 |
+
You cannot change GLIBC in Hugging Face Docker — so if your library requires GLIBC 2.32+, it will not run.
|
58 |
+
|
59 |
embedding_model = GPT4AllEmbeddings(model_file= model_file)
|
60 |
+
'''
|
61 |
+
|
62 |
+
embedding_model = HuggingFaceEmbeddings(model_file= model_file)
|
63 |
|
64 |
# Dua vao Faiss Vector DB
|
65 |
db = FAISS.from_texts(texts=chunks, embedding=embedding_model)
|
|
|
76 |
text_splitter = CharacterTextSplitter(chunk_size = 512, chunk_overlap = 50)
|
77 |
chunks = text_splitter.split_documents(documents)
|
78 |
|
79 |
+
embedding_model = HuggingFaceEmbeddings(model_file = model_file)
|
80 |
dp = FAISS.from_documents(chunks, embedding_model)
|
81 |
dp.save_local(vector_dp_path)
|
82 |
return dp
|