Spaces:
Runtime error
Runtime error
new code
Browse files- src/pdfchatbot.py +9 -3
src/pdfchatbot.py
CHANGED
@@ -11,7 +11,7 @@ from langchain.document_loaders import PyPDFLoader
|
|
11 |
from langchain.prompts import PromptTemplate
|
12 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
13 |
import spaces
|
14 |
-
from langchain_text_splitters import CharacterTextSplitter
|
15 |
|
16 |
|
17 |
class PDFChatBot:
|
@@ -96,7 +96,12 @@ class PDFChatBot:
|
|
96 |
"""
|
97 |
Load the vector database from the documents and embeddings.
|
98 |
"""
|
99 |
-
text_splitter =
|
|
|
|
|
|
|
|
|
|
|
100 |
docs = text_splitter.split_documents(self.documents)
|
101 |
self.vectordb = Chroma.from_documents(docs, self.embeddings)
|
102 |
|
@@ -132,7 +137,8 @@ class PDFChatBot:
|
|
132 |
def create_organic_pipeline(self):
|
133 |
self.pipeline = pipeline(
|
134 |
"text-generation",
|
135 |
-
model=self.
|
|
|
136 |
model_kwargs={"torch_dtype": torch.bfloat16},
|
137 |
device="cuda",
|
138 |
)
|
|
|
11 |
from langchain.prompts import PromptTemplate
|
12 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
13 |
import spaces
|
14 |
+
from langchain_text_splitters import CharacterTextSplitter,RecursiveCharacterTextSplitter
|
15 |
|
16 |
|
17 |
class PDFChatBot:
|
|
|
96 |
"""
|
97 |
Load the vector database from the documents and embeddings.
|
98 |
"""
|
99 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
100 |
+
chunk_size=256,
|
101 |
+
chunk_overlap=100,
|
102 |
+
length_function=len,
|
103 |
+
add_start_index=True,
|
104 |
+
)
|
105 |
docs = text_splitter.split_documents(self.documents)
|
106 |
self.vectordb = Chroma.from_documents(docs, self.embeddings)
|
107 |
|
|
|
137 |
def create_organic_pipeline(self):
|
138 |
self.pipeline = pipeline(
|
139 |
"text-generation",
|
140 |
+
model=self.model,
|
141 |
+
tokenizer=self.tokenizer,
|
142 |
model_kwargs={"torch_dtype": torch.bfloat16},
|
143 |
device="cuda",
|
144 |
)
|