Pudding48 commited on
Commit
07b255e
·
verified ·
1 Parent(s): 5a05052

Update prepare_vector_dp.py

Browse files
Files changed (1) hide show
  1. prepare_vector_dp.py +19 -2
prepare_vector_dp.py CHANGED
@@ -1,10 +1,12 @@
1
  from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
2
  from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
3
  from langchain_community.vectorstores import FAISS
4
- from langchain_community.embeddings import GPT4AllEmbeddings
 
5
 
6
  from huggingface_hub import hf_hub_download
7
 
 
8
  # from llama_cpp import Llama
9
  import os
10
 
@@ -42,7 +44,22 @@ def create_db_from_text():
42
  chunks = text_splitter.split_text(raw_text)
43
 
44
  # Embeding
 
 
 
 
 
 
 
 
 
 
 
 
45
  embedding_model = GPT4AllEmbeddings(model_file= model_file)
 
 
 
46
 
47
  # Dua vao Faiss Vector DB
48
  db = FAISS.from_texts(texts=chunks, embedding=embedding_model)
@@ -59,7 +76,7 @@ def create_dp_from_files():
59
  text_splitter = CharacterTextSplitter(chunk_size = 512, chunk_overlap = 50)
60
  chunks = text_splitter.split_documents(documents)
61
 
62
- embedding_model = GPT4AllEmbeddings(model_file = model_file)
63
  dp = FAISS.from_documents(chunks, embedding_model)
64
  dp.save_local(vector_dp_path)
65
  return dp
 
1
  from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
2
  from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
3
  from langchain_community.vectorstores import FAISS
4
+ #from langchain_community.embeddings import GPT4AllEmbeddings
5
+ from langchain_community.embeddings import HuggingFaceEmbeddings
6
 
7
  from huggingface_hub import hf_hub_download
8
 
9
+
10
  # from llama_cpp import Llama
11
  import os
12
 
 
44
  chunks = text_splitter.split_text(raw_text)
45
 
46
  # Embeding
47
+ '''
48
+ 🔥 The gpt4all embedding library you’re using was compiled against GLIBC 2.32 or higher,
49
+ but the Hugging Face Docker environment only provides GLIBC 2.31 or lower.
50
+
51
+ So your Space crashes because it tries to load a C-based .so library that depends on a newer system-level runtime.
52
+
53
+ 🧠 What is GLIBC?
54
+ GLIBC is the GNU C standard library — it’s a low-level part of Linux.
55
+ Most .so libraries (like libllmodel.so) built from C++ depend on a minimum GLIBC version.
56
+
57
+ You cannot change GLIBC in Hugging Face Docker — so if your library requires GLIBC 2.32+, it will not run.
58
+
59
  embedding_model = GPT4AllEmbeddings(model_file= model_file)
60
+ '''
61
+
62
+ embedding_model = HuggingFaceEmbeddings(model_file= model_file)
63
 
64
  # Dua vao Faiss Vector DB
65
  db = FAISS.from_texts(texts=chunks, embedding=embedding_model)
 
76
  text_splitter = CharacterTextSplitter(chunk_size = 512, chunk_overlap = 50)
77
  chunks = text_splitter.split_documents(documents)
78
 
79
+ embedding_model = HuggingFaceEmbeddings(model_file = model_file)
80
  dp = FAISS.from_documents(chunks, embedding_model)
81
  dp.save_local(vector_dp_path)
82
  return dp