Spaces:
Sleeping
Sleeping
Doux Thibault
commited on
Commit
·
af73715
1
Parent(s):
7fb7516
add rag
Browse files- Modules/rag.py +0 -20
Modules/rag.py
CHANGED
@@ -19,20 +19,7 @@ from langchain_mistralai import ChatMistralAI
|
|
19 |
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
|
20 |
from langchain_community.tools import DuckDuckGoSearchRun
|
21 |
|
22 |
-
# urls = [
|
23 |
-
# "https://www.toutelanutrition.com/wikifit/guide-nutrition/nutrition-sportive/apports-proteines",
|
24 |
|
25 |
-
# ]
|
26 |
-
|
27 |
-
# docs = [WebBaseLoader(url).load() for url in urls]
|
28 |
-
# docs_list = [item for sublist in docs for item in sublist]
|
29 |
-
|
30 |
-
# text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
31 |
-
# chunk_size=250, chunk_overlap=0
|
32 |
-
# )
|
33 |
-
# doc_splits = text_splitter.split_documents(docs_list)
|
34 |
-
|
35 |
-
####### PDF
|
36 |
def load_chunk_persist_pdf() -> Chroma:
|
37 |
pdf_folder_path = "data/pdf_folder/"
|
38 |
documents = []
|
@@ -52,13 +39,6 @@ def load_chunk_persist_pdf() -> Chroma:
|
|
52 |
vectorstore.persist()
|
53 |
return vectorstore
|
54 |
|
55 |
-
# from langchain_community.document_loaders import PyPDFLoader
|
56 |
-
# loader = PyPDFLoader('data/fitness_programs/ZeroToHero.pdf')
|
57 |
-
# pages = loader.load_and_split()
|
58 |
-
|
59 |
-
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
60 |
-
# splits = text_splitter.split_documents(pages)
|
61 |
-
# vectorstore = Chroma.from_documents(documents=splits, embedding=MistralAIEmbeddings())
|
62 |
vectorstore = load_chunk_persist_pdf()
|
63 |
retriever = vectorstore.as_retriever()
|
64 |
prompt = hub.pull("rlm/rag-prompt")
|
|
|
19 |
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
|
20 |
from langchain_community.tools import DuckDuckGoSearchRun
|
21 |
|
|
|
|
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
def load_chunk_persist_pdf() -> Chroma:
|
24 |
pdf_folder_path = "data/pdf_folder/"
|
25 |
documents = []
|
|
|
39 |
vectorstore.persist()
|
40 |
return vectorstore
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
vectorstore = load_chunk_persist_pdf()
|
43 |
retriever = vectorstore.as_retriever()
|
44 |
prompt = hub.pull("rlm/rag-prompt")
|