Spaces:
Runtime error
Runtime error
Rename and update db.py
Browse files- setup_db.py → db.py +13 -8
setup_db.py → db.py
RENAMED
@@ -6,7 +6,6 @@ from langchain_chroma import Chroma
|
|
6 |
|
7 |
|
8 |
file_path="./paul_graham_essays.csv"
|
9 |
-
model_path = 'intfloat/multilingual-e5-large-instruct'
|
10 |
db_persist_directory = './docs/chroma/'
|
11 |
|
12 |
|
@@ -26,14 +25,14 @@ def load_data():
|
|
26 |
return data[1:]
|
27 |
|
28 |
|
29 |
-
def
|
30 |
splitter = RecursiveCharacterTextSplitter(
|
31 |
-
chunk_size=
|
32 |
-
chunk_overlap=
|
33 |
separators=['\n\n', '\n', '(?<=\. )', ' ', '']
|
34 |
)
|
35 |
|
36 |
-
return splitter.split_documents(
|
37 |
|
38 |
|
39 |
def generate_embeddings():
|
@@ -49,8 +48,14 @@ def generate_embeddings():
|
|
49 |
)
|
50 |
|
51 |
|
52 |
-
def get_db(
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
embedding = generate_embeddings()
|
56 |
|
@@ -58,4 +63,4 @@ def get_db():
|
|
58 |
documents=splits,
|
59 |
embedding=embedding,
|
60 |
persist_directory=persist_directory,
|
61 |
-
)
|
|
|
6 |
|
7 |
|
8 |
file_path="./paul_graham_essays.csv"
|
|
|
9 |
db_persist_directory = './docs/chroma/'
|
10 |
|
11 |
|
|
|
25 |
return data[1:]
|
26 |
|
27 |
|
28 |
+
def split_data(data, chunk_size, chunk_overlap):
|
29 |
splitter = RecursiveCharacterTextSplitter(
|
30 |
+
chunk_size=chunk_size,
|
31 |
+
chunk_overlap=chunk_overlap,
|
32 |
separators=['\n\n', '\n', '(?<=\. )', ' ', '']
|
33 |
)
|
34 |
|
35 |
+
return splitter.split_documents(data)
|
36 |
|
37 |
|
38 |
def generate_embeddings():
|
|
|
48 |
)
|
49 |
|
50 |
|
51 |
+
def get_db(
|
52 |
+
chunk_size=1000,
|
53 |
+
chunk_overlap=200,
|
54 |
+
model_path = 'intfloat/multilingual-e5-large-instruct',
|
55 |
+
):
|
56 |
+
data = load_data()
|
57 |
+
|
58 |
+
splits = split_data(data, chunk_size, chunk_overlap)
|
59 |
|
60 |
embedding = generate_embeddings()
|
61 |
|
|
|
63 |
documents=splits,
|
64 |
embedding=embedding,
|
65 |
persist_directory=persist_directory,
|
66 |
+
)
|