samim2024 commited on
Commit
3c59ee7
·
verified ·
1 Parent(s): 85cdbf8

Delete rag.py

Browse files
Files changed (1) hide show
  1. rag.py +0 -72
rag.py DELETED
@@ -1,72 +0,0 @@
1
- from langchain.vectorstores import Chroma
2
- from langchain.chat_models import ChatOllama
3
- from langchain.embeddings import FastEmbedEmbeddings
4
- from langchain.schema.output_parser import StrOutputParser
5
- from langchain.document_loaders import PyPDFLoader
6
- from langchain.text_splitter import RecursiveCharacterTextSplitter
7
- from langchain.schema.runnable import RunnablePassthrough
8
- from langchain.prompts import PromptTemplate
9
- from langchain.vectorstores.utils import filter_complex_metadata
10
- #add new import
11
- from langchain_community.document_loaders.csv_loader import CSVLoader
12
-
13
- from sentence_transformers import SentenceTransformer
14
-
15
- from langchain_community.embeddings import HuggingFaceEmbeddings
16
- model_name = "sentence-transformers/all-mpnet-base-v2"
17
- embedding = HuggingFaceEmbeddings(
18
- model_name=model_name,
19
- )
20
-
21
-
22
-
23
- class ChatPDF:
24
- vector_store = None
25
- retriever = None
26
- chain = None
27
-
28
- def __init__(self):
29
- self.model = ChatOllama(model="mistral")
30
- self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=100)
31
- self.prompt = PromptTemplate.from_template(
32
- """
33
- <s> [INST] You are an assistant for question-answering tasks. Use only the following pieces of retrieved context
34
- to build an answer for the user. If you don't know the answer, just say that you don't know. Use three sentences
35
- maximum and keep the answer concise. [/INST] </s>
36
- [INST] Question: {question}
37
- Context: {context}
38
- Answer: [/INST]
39
- """
40
- )
41
-
42
- def ingest(self, pdf_file_path: str):
43
- docs = PyPDFLoader(file_path=pdf_file_path).load()
44
-
45
-
46
- chunks = self.text_splitter.split_documents(docs)
47
- chunks = filter_complex_metadata(chunks)
48
-
49
- vector_store = Chroma.from_documents(documents=chunks, embedding=embedding)
50
- self.retriever = vector_store.as_retriever(
51
- search_type="similarity_score_threshold",
52
- search_kwargs={
53
- "k": 3,
54
- "score_threshold": 0.5,
55
- },
56
- )
57
-
58
- self.chain = ({"context": self.retriever, "question": RunnablePassthrough()}
59
- | self.prompt
60
- | self.model
61
- | StrOutputParser())
62
-
63
- def ask(self, query: str):
64
- if not self.chain:
65
- return "Please, add a PDF document first."
66
-
67
- return self.chain.invoke(query)
68
-
69
- def clear(self):
70
- self.vector_store = None
71
- self.retriever = None
72
- self.chain = None