Doux Thibault commited on
Commit
af73715
·
1 Parent(s): 7fb7516
Files changed (1) hide show
  1. Modules/rag.py +0 -20
Modules/rag.py CHANGED
@@ -19,20 +19,7 @@ from langchain_mistralai import ChatMistralAI
19
  from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
20
  from langchain_community.tools import DuckDuckGoSearchRun
21
 
22
- # urls = [
23
- # "https://www.toutelanutrition.com/wikifit/guide-nutrition/nutrition-sportive/apports-proteines",
24
 
25
- # ]
26
-
27
- # docs = [WebBaseLoader(url).load() for url in urls]
28
- # docs_list = [item for sublist in docs for item in sublist]
29
-
30
- # text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
31
- # chunk_size=250, chunk_overlap=0
32
- # )
33
- # doc_splits = text_splitter.split_documents(docs_list)
34
-
35
- ####### PDF
36
  def load_chunk_persist_pdf() -> Chroma:
37
  pdf_folder_path = "data/pdf_folder/"
38
  documents = []
@@ -52,13 +39,6 @@ def load_chunk_persist_pdf() -> Chroma:
52
  vectorstore.persist()
53
  return vectorstore
54
 
55
- # from langchain_community.document_loaders import PyPDFLoader
56
- # loader = PyPDFLoader('data/fitness_programs/ZeroToHero.pdf')
57
- # pages = loader.load_and_split()
58
-
59
- # text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
60
- # splits = text_splitter.split_documents(pages)
61
- # vectorstore = Chroma.from_documents(documents=splits, embedding=MistralAIEmbeddings())
62
  vectorstore = load_chunk_persist_pdf()
63
  retriever = vectorstore.as_retriever()
64
  prompt = hub.pull("rlm/rag-prompt")
 
19
  from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
20
  from langchain_community.tools import DuckDuckGoSearchRun
21
 
 
 
22
 
 
 
 
 
 
 
 
 
 
 
 
23
  def load_chunk_persist_pdf() -> Chroma:
24
  pdf_folder_path = "data/pdf_folder/"
25
  documents = []
 
39
  vectorstore.persist()
40
  return vectorstore
41
 
 
 
 
 
 
 
 
42
  vectorstore = load_chunk_persist_pdf()
43
  retriever = vectorstore.as_retriever()
44
  prompt = hub.pull("rlm/rag-prompt")