Update app.py
Browse files
app.py
CHANGED
|
@@ -8,6 +8,8 @@ from langchain.prompts import PromptTemplate
|
|
| 8 |
from langchain.chains.question_answering import load_qa_chain
|
| 9 |
from langchain.vectorstores import Chroma
|
| 10 |
from langchain.retrievers import mmr_retriever
|
|
|
|
|
|
|
| 11 |
|
| 12 |
# Initialize variables and paths
|
| 13 |
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
@@ -32,7 +34,7 @@ def load_embeddings():
|
|
| 32 |
hf = load_embeddings()
|
| 33 |
|
| 34 |
@st.cache_data
|
| 35 |
-
def
|
| 36 |
documents = []
|
| 37 |
for filename in os.listdir(data_path):
|
| 38 |
if filename.endswith('.txt'):
|
|
@@ -40,7 +42,7 @@ def load_documents(data_path):
|
|
| 40 |
documents.extend(TextLoader(file_path).load())
|
| 41 |
return documents
|
| 42 |
|
| 43 |
-
documents =
|
| 44 |
|
| 45 |
def split_docs(documents, chunk_size, overlap):
|
| 46 |
# Your implementation here
|
|
@@ -48,25 +50,20 @@ def split_docs(documents, chunk_size, overlap):
|
|
| 48 |
|
| 49 |
docs = split_docs(documents, 450, 20)
|
| 50 |
|
| 51 |
-
@st.cache_resource
|
| 52 |
-
def create_chroma_db(docs, hf):
|
| 53 |
-
return Chroma(docs, hf)
|
| 54 |
|
| 55 |
-
chroma_db = create_chroma_db(docs, hf)
|
| 56 |
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
|
|
|
| 60 |
|
| 61 |
-
retriever = create_retriever(chroma_db)
|
| 62 |
|
| 63 |
-
# Set up LlamaCpp model
|
| 64 |
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
| 65 |
|
| 66 |
@st.cache_resource
|
| 67 |
-
def load_llm():
|
| 68 |
return LlamaCpp(
|
| 69 |
-
model_path=
|
| 70 |
n_gpu_layers=0,
|
| 71 |
temperature=0.0,
|
| 72 |
top_p=0.5,
|
|
|
|
| 8 |
from langchain.chains.question_answering import load_qa_chain
|
| 9 |
from langchain.vectorstores import Chroma
|
| 10 |
from langchain.retrievers import mmr_retriever
|
| 11 |
+
from utills import load_txt_documents , split_docs, chroma_db,
|
| 12 |
+
|
| 13 |
|
| 14 |
# Initialize variables and paths
|
| 15 |
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
| 34 |
hf = load_embeddings()
|
| 35 |
|
| 36 |
@st.cache_data
|
| 37 |
+
def load_txt_documents(data_path):
|
| 38 |
documents = []
|
| 39 |
for filename in os.listdir(data_path):
|
| 40 |
if filename.endswith('.txt'):
|
|
|
|
| 42 |
documents.extend(TextLoader(file_path).load())
|
| 43 |
return documents
|
| 44 |
|
| 45 |
+
documents = load_txt_documents(data_path)
|
| 46 |
|
| 47 |
def split_docs(documents, chunk_size, overlap):
|
| 48 |
# Your implementation here
|
|
|
|
| 50 |
|
| 51 |
docs = split_docs(documents, 450, 20)
|
| 52 |
|
|
|
|
|
|
|
|
|
|
| 53 |
|
|
|
|
| 54 |
|
| 55 |
+
chroma_db = chroma_db(docs, hf)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
retriever = retriever_from_chroma(chroma_db,"mmr",6)
|
| 59 |
|
|
|
|
| 60 |
|
|
|
|
| 61 |
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
| 62 |
|
| 63 |
@st.cache_resource
|
| 64 |
+
def load_llm(model_path):
|
| 65 |
return LlamaCpp(
|
| 66 |
+
model_path=model_path,
|
| 67 |
n_gpu_layers=0,
|
| 68 |
temperature=0.0,
|
| 69 |
top_p=0.5,
|