ravi259 commited on
Commit
ef5b171
·
1 Parent(s): 22cf799

with app and other files

Browse files
.gitattributes ADDED
File without changes
.github/workflows/sync_to_huggingface_space.yml CHANGED
@@ -17,4 +17,4 @@ jobs:
17
  - name: Push to hub
18
  env:
19
  HF_TOKEN: ${{ secrets.HF_SPACES_TOKEN }}
20
- run: git push --force https://ravi259:[email protected]/spaces/ravi259/baserag_hf main
 
17
  - name: Push to hub
18
  env:
19
  HF_TOKEN: ${{ secrets.HF_SPACES_TOKEN }}
20
+ run: git push https://ravi259:[email protected]/spaces/ravi259/baserag_hf main
.gitignore CHANGED
@@ -1,4 +1,4 @@
1
  data/
2
  vectorstore/
3
  .env
4
- .ipynb
 
1
  data/
2
  vectorstore/
3
  .env
4
+ *.ipynb
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.10
README.md CHANGED
@@ -1,2 +1,37 @@
1
- # baserag
2
- baserag
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # LLM Evluation using Ragas and Langchain
2
+
3
+ Ragas is a framework that helps you evaluate an enterprise Retrieval Augmented Generation (RAG) pipelines.
4
+ Ragas is very easy to use and evaluate the RAG since there is no additional data required. The Context used in the RAG pipeline and Question and Answers are used for evaluating the RAG.
5
+
6
+ Ragas can provide below metrics https://docs.ragas.io/en/latest/concepts/metrics/index.html
7
+
8
+ * Faithfulness
9
+ * Answer relevancy
10
+ * Context recall
11
+ * Context precision
12
+ * Context relevancy
13
+ * Context entity recall
14
+
15
+ We will use LangChain framework to implement the RAG and functions/chains provided within LangChain
16
+
17
+ ## Purpose
18
+
19
+ Evaluation or RAG approach using LangChain and OpenAI
20
+
21
+ ## Features
22
+
23
+
24
+ ## Usage
25
+
26
+ ## Sample Output
27
+
28
+
29
+ ## Future Enhancements
30
+
31
+ ## Contributing
32
+
33
+ Contributions are welcome! If you have any ideas, suggestions, or bug fixes, please submit a pull request or open an issue in the GitHub repository.
34
+
35
+ ## License
36
+
37
+ This project is licensed under the MIT License.
app.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import easyocr as ocr #OCR
2
+ import streamlit as st #Web App
3
+ from PIL import Image #Image Processing
4
+ import numpy as np #Image Processing
5
+
6
+ # To analyze the PDF layout and extract text
7
+ from pdfminer.high_level import extract_pages, extract_text
8
+ from pdfminer.layout import LTTextContainer, LTChar, LTRect, LTFigure
9
+ # To extract text from tables in PDF
10
+ import pdfplumber
11
+ # To extract the images from the PDFs
12
+ from PIL import Image
13
+ from pdf2image import convert_from_path
14
+
15
+ import streamlit as st
16
+ import pandas as pd
17
+
18
+ import gradio as gr
19
+ import time
20
+ from PyPDF2 import PdfReader
21
+ import easyocr as ocr #OCR
22
+ import streamlit as st #Web App
23
+ from PIL import Image #Image Processing
24
+ import numpy as np #Image Processing
25
+ # To read the PDF
26
+ import PyPDF2
27
+ # To analyze the PDF layout and extract text
28
+ from pdfminer.high_level import extract_pages, extract_text
29
+ from pdfminer.layout import LTTextContainer, LTChar, LTRect, LTFigure
30
+ # To extract text from tables in PDF
31
+ import pdfplumber
32
+ # To extract the images from the PDFs
33
+ from PIL import Image
34
+ from pdf2image import convert_from_path
35
+ # To perform OCR to extract text from images
36
+ import pytesseract
37
+ # To remove the additional created files
38
+ import os
39
+ import tiktoken
40
+ import streamlit as st
41
+ import pandas as pd
42
+ from io import StringIO
43
+ import time
44
+ import json
45
+ import openai
46
+
47
+
48
+ import requests
49
+ from langchain_community.document_loaders import TextLoader
50
+ from langchain.text_splitter import CharacterTextSplitter
51
+
52
+ #from langchain_community.embeddings import OpenAIEmbeddings
53
+ from langchain_openai import OpenAIEmbeddings
54
+ from langchain_community.vectorstores import FAISS
55
+ from dotenv import load_dotenv,find_dotenv
56
+
57
+ #from langchain_community.chat_models import ChatOpenAI
58
+ from langchain_openai import ChatOpenAI
59
+ from langchain.prompts import ChatPromptTemplate
60
+ from langchain.schema.runnable import RunnablePassthrough
61
+ from langchain.schema.output_parser import StrOutputParser
62
+ from langchain.memory import ConversationBufferMemory
63
+ from langchain.chains import ConversationChain
64
+
65
+ from datasets import Dataset
66
+
67
+ from ragas import evaluate
68
+ from ragas.metrics import (
69
+ faithfulness,
70
+ answer_relevancy,
71
+ context_recall,
72
+ context_precision,
73
+ )
74
+
75
+ import os
76
+ from dotenv import load_dotenv
77
+ from htmlTemplates import bot_template, user_template, css
78
+
79
+ load_dotenv()
80
+ OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
81
+
82
+ def load_knowledgeBase():
83
+ embeddings=OpenAIEmbeddings(api_key=OPENAI_API_KEY)
84
+ DB_FAISS_PATH = "../Ragas-LangChain-Evaluation/vectorstore/db_faiss/"
85
+ db = FAISS.load_local(
86
+ DB_FAISS_PATH,
87
+ embeddings,
88
+ allow_dangerous_deserialization=True,
89
+ index_name="njmvc_Index"
90
+ )
91
+ return db
92
+ def load_prompt():
93
+ prompt = """ You are helping students to pass NJMVC Knowledge Test. Provide a Single multiple choice question with 4 options to choose from.
94
+ Use the context to provide the question and answer choices.
95
+ context = {context}
96
+ question = {question}
97
+ if the answer is not in the pdf answer "i donot know what the hell you are asking about"
98
+ """
99
+ prompt = ChatPromptTemplate.from_template(prompt)
100
+ return prompt
101
+
102
+ #function to load the OPENAI LLM
103
+ def load_llm():
104
+ llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, api_key=OPENAI_API_KEY)
105
+ return llm
106
+
107
+ knowledgeBase=load_knowledgeBase()
108
+ prompt = load_prompt()
109
+ llm=load_llm()
110
+
111
+ def get_conversation_chain(vectorstore, llm):
112
+ llm = llm
113
+ #llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
114
+
115
+ memory = ConversationBufferMemory(memory_key="chat_history")
116
+ conversation_chain = ConversationChain(
117
+ llm=llm,
118
+ verbose=True,
119
+ memory=ConversationBufferMemory(),
120
+ )
121
+ return conversation_chain
122
+
123
+ def format_docs(docs):
124
+ return "\n\n".join(doc.page_content for doc in docs)
125
+
126
+ def get_pdf_text(pdf_files):
127
+
128
+ text = ""
129
+ for pdf_file in pdf_files:
130
+ reader = PdfReader(pdf_file)
131
+ for page in reader.pages:
132
+ text += page.extract_text()
133
+ return text
134
+
135
+ def get_chunk_text(text):
136
+ text_splitter = CharacterTextSplitter(
137
+ separator = "\n",
138
+ chunk_size = 1000,
139
+ chunk_overlap = 200,
140
+ length_function = len
141
+ )
142
+ chunks = text_splitter.split_text(text)
143
+ return chunks
144
+
145
+ def handle_user_input(question):
146
+ response = st.session_state.conversation({'question':question})
147
+
148
+ st.session_state.chat_history = response['chat_history']
149
+
150
+ for i, message in enumerate(st.session_state.chat_history):
151
+ if i % 2 == 0:
152
+ st.write(user_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
153
+ else:
154
+ st.write(bot_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
155
+
156
+ def main():
157
+ st.set_page_config(page_title='NJMVC Knowledge Test with RAGAS', page_icon=':cars:')
158
+
159
+ st.write(css, unsafe_allow_html=True)
160
+
161
+ if "conversation" not in st.session_state:
162
+ st.session_state.conversation = None
163
+
164
+ if "chat_history" not in st.session_state:
165
+ st.session_state.chat_history = None
166
+
167
+ st.header('NJMVC Knowledge Test with RAGAS :cars:')
168
+ question = st.text_input("Input the Topic you want to test your knowledge: ")
169
+
170
+ if question:
171
+ #handle_user_input(question)
172
+
173
+ with st.spinner("Get ready..."):
174
+ text_chunks = get_chunk_text(question)
175
+
176
+ db = FAISS.load_local(folder_path="../Ragas-LangChain-Evaluation/vectorstore/db_faiss/",embeddings=OpenAIEmbeddings(api_key=OPENAI_API_KEY),allow_dangerous_deserialization=True, index_name="njmvc_Index")
177
+ searchDocs = db.similarity_search("what is the NJMVC driving test")
178
+
179
+ similar_embeddings=FAISS.from_documents(documents=searchDocs, embedding=OpenAIEmbeddings(api_key=OPENAI_API_KEY))
180
+ #creating the chain for integrating llm,prompt,stroutputparser
181
+ retriever = similar_embeddings.as_retriever()
182
+ rag_chain = (
183
+ {"context": retriever | format_docs, "question": RunnablePassthrough()}
184
+ | prompt
185
+ | llm
186
+ | StrOutputParser()
187
+ )
188
+ #st.session_state.conversation = get_conversation_chain(vector_store)
189
+
190
+ response=rag_chain.invoke(question)
191
+ st.write(response)
192
+
193
+ if __name__ == '__main__':
194
+ main()
htmlTemplates.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ css = '''
2
+ <style>
3
+ .chat-message {
4
+ padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex
5
+ }
6
+ .chat-message.user {
7
+ background-color: #2b313e
8
+ }
9
+ .chat-message.bot {
10
+ background-color: #475063
11
+ }
12
+ .chat-message .avatar {
13
+ width: 20%;
14
+ }
15
+ .chat-message .avatar img {
16
+ max-width: 78px;
17
+ max-height: 78px;
18
+ border-radius: 50%;
19
+ object-fit: cover;
20
+ }
21
+ .chat-message .message {
22
+ width: 80%;
23
+ padding: 0 1.5rem;
24
+ color: #fff;
25
+ }
26
+ '''
27
+
28
+ bot_template = '''
29
+ <div class="chat-message bot">
30
+ <div class="avatar">
31
+ <img src="https://i.ibb.co/cN0nmSj/Screenshot-2023-05-28-at-02-37-21.png">
32
+ </div>
33
+ <div class="message">{{MSG}}</div>
34
+ </div>
35
+ '''
36
+ user_template = '''
37
+ <div class="chat-message user">
38
+ <div class="avatar">
39
+ <img src="https://i.ibb.co/rdZC7LZ/Photo-logo-1.png">
40
+ </div>
41
+ <div class="message">{{MSG}}</div>
42
+ </div>
43
+ '''
vector_loader.py ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import PyPDFLoader
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain_community.vectorstores import FAISS
4
+ from langchain_openai import OpenAIEmbeddings
5
+ import PyPDF2
6
+ from PyPDF2 import PdfReader
7
+ import pdfplumber
8
+ from PIL import Image
9
+ import pytesseract
10
+ from pdf2image import convert_from_path
11
+
12
+ from pdfminer.high_level import extract_pages, extract_text
13
+ from pdfminer.layout import LTTextContainer, LTChar, LTRect, LTFigure
14
+
15
+ import os
16
+ from dotenv import load_dotenv
17
+
18
+ load_dotenv()
19
+ OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
20
+
21
+ # Extracting tables from the page
22
+ def extract_table(pdf_path, page_num, table_num):
23
+ # Open the pdf file
24
+ pdf = pdfplumber.open(pdf_path)
25
+ # Find the examined page
26
+ table_page = pdf.pages[page_num]
27
+ # Extract the appropriate table
28
+ table = table_page.extract_tables()[table_num]
29
+
30
+ return table
31
+
32
+ # Convert table into appropriate fromat
33
+ def table_converter(table):
34
+ table_string = ''
35
+ # Iterate through each row of the table
36
+ for row_num in range(len(table)):
37
+ row = table[row_num]
38
+ # Remove the line breaker from the wrapted texts
39
+ cleaned_row = [item.replace('\n', ' ') if item is not None and '\n' in item else 'None' if item is None else item for item in row]
40
+ # Convert the table into a string
41
+ table_string+=('|'+'|'.join(cleaned_row)+'|'+'\n')
42
+ # Removing the last line break
43
+ table_string = table_string[:-1]
44
+ return table_string
45
+
46
+
47
+ # Create a function to check if the element is in any tables present in the page
48
+ def is_element_inside_any_table(element, page ,tables):
49
+ x0, y0up, x1, y1up = element.bbox
50
+ # Change the cordinates because the pdfminer counts from the botton to top of the page
51
+ y0 = page.bbox[3] - y1up
52
+ y1 = page.bbox[3] - y0up
53
+ for table in tables:
54
+ tx0, ty0, tx1, ty1 = table.bbox
55
+ if tx0 <= x0 <= x1 <= tx1 and ty0 <= y0 <= y1 <= ty1:
56
+ return True
57
+ return False
58
+
59
+ # Function to find the table for a given element
60
+ def find_table_for_element(element, page ,tables):
61
+ x0, y0up, x1, y1up = element.bbox
62
+ # Change the cordinates because the pdfminer counts from the botton to top of the page
63
+ y0 = page.bbox[3] - y1up
64
+ y1 = page.bbox[3] - y0up
65
+ for i, table in enumerate(tables):
66
+ tx0, ty0, tx1, ty1 = table.bbox
67
+ if tx0 <= x0 <= x1 <= tx1 and ty0 <= y0 <= y1 <= ty1:
68
+ return i # Return the index of the table
69
+ return None
70
+
71
+
72
+ def text_extraction(element):
73
+ # Extracting the text from the in line text element
74
+ line_text = element.get_text()
75
+
76
+ # Find the formats of the text
77
+ # Initialize the list with all the formats appeared in the line of text
78
+ line_formats = []
79
+ for text_line in element:
80
+ if isinstance(text_line, LTTextContainer):
81
+ # Iterating through each character in the line of text
82
+ for character in text_line:
83
+ if isinstance(character, LTChar):
84
+ # Append the font name of the character
85
+ #line_formats.append(character.fontname)
86
+ # Append the font size of the character
87
+ #line_formats.append(character.size)
88
+ line_formats.append("")
89
+
90
+ # Find the unique font sizes and names in the line
91
+ format_per_line = list(set(line_formats))
92
+
93
+ # Return a tuple with the text in each line along with its format
94
+ return (line_text, format_per_line)
95
+
96
+
97
+ # Create a function to crop the image elements from PDFs
98
+ def crop_image(element, pageObj):
99
+ # Get the coordinates to crop the image from PDF
100
+ [image_left, image_top, image_right, image_bottom] = [element.x0,element.y0,element.x1,element.y1]
101
+ # Crop the page using coordinates (left, bottom, right, top)
102
+ pageObj.mediabox.lower_left = (image_left, image_bottom)
103
+ pageObj.mediabox.upper_right = (image_right, image_top)
104
+ # Save the cropped page to a new PDF
105
+ cropped_pdf_writer = PyPDF2.PdfWriter()
106
+ cropped_pdf_writer.add_page(pageObj)
107
+ # Save the cropped PDF to a new file
108
+ with open('cropped_image.pdf', 'wb') as cropped_pdf_file:
109
+ cropped_pdf_writer.write(cropped_pdf_file)
110
+
111
+ # Create a function to convert the PDF to images
112
+ def convert_to_images(input_file,):
113
+ images = convert_from_path(input_file)
114
+ image = images[0]
115
+ output_file = 'PDF_image.png'
116
+ image.save(output_file, 'PNG')
117
+
118
+ # Create a function to read text from images
119
+ def image_to_text(image_path):
120
+ # Read the image
121
+ img = Image.open(image_path)
122
+ # Extract the text from the image
123
+ text = pytesseract.image_to_string(img)
124
+ return text
125
+
126
+
127
+
128
+ def read_file_get_prompts(file_name):
129
+ if file_name is not None:
130
+
131
+ # Find the PDF path
132
+ pdf_path = file_name # '/content/data/'+file_name+".pdf"
133
+ pdfReaded = PyPDF2.PdfReader(file_name)
134
+
135
+ # Create the dictionary to extract text from each image
136
+ text_per_page = {}
137
+ # Create a boolean variable for image detection
138
+ image_flag = False
139
+
140
+ number_of_pages = len(list(extract_pages(file_name)))
141
+ result = ''
142
+
143
+ # We extract the pages from the PDF
144
+ for pagenum, page in enumerate(extract_pages(file_name)):
145
+
146
+ # Initialize the variables needed for the text extraction from the page
147
+ pageObj = pdfReaded.pages[pagenum]
148
+ page_text = []
149
+ line_format = []
150
+ text_from_images = []
151
+ text_from_tables = []
152
+ page_content = []
153
+ # Initialize the number of the examined tables
154
+ table_in_page= -1
155
+ # Open the pdf file
156
+ pdf = pdfplumber.open(pdf_path)
157
+ # Find the examined page
158
+ page_tables = pdf.pages[pagenum]
159
+ # Find the number of tables in the page
160
+ tables = page_tables.find_tables()
161
+ if len(tables)!=0:
162
+ table_in_page = 0
163
+
164
+ # Extracting the tables of the page
165
+ for table_num in range(len(tables)):
166
+ # Extract the information of the table
167
+ table = extract_table(pdf_path, pagenum, table_num)
168
+ # Convert the table information in structured string format
169
+ table_string = table_converter(table)
170
+ # Append the table string into a list
171
+ text_from_tables.append(table_string)
172
+
173
+ # Find all the elements
174
+ page_elements = [(element.y1, element) for element in page._objs]
175
+ # Sort all the element as they appear in the page
176
+ page_elements.sort(key=lambda a: a[0], reverse=True)
177
+
178
+
179
+ # Find the elements that composed a page
180
+ for i,component in enumerate(page_elements):
181
+ # Extract the element of the page layout
182
+ element = component[1]
183
+
184
+ # Check the elements for tables
185
+ if table_in_page == -1:
186
+ pass
187
+ else:
188
+ if is_element_inside_any_table(element, page ,tables):
189
+ table_found = find_table_for_element(element,page ,tables)
190
+ if table_found == table_in_page and table_found != None:
191
+ page_content.append(text_from_tables[table_in_page])
192
+ #page_text.append('table')
193
+ #line_format.append('table')
194
+ table_in_page+=1
195
+ # Pass this iteration because the content of this element was extracted from the tables
196
+ continue
197
+
198
+ if not is_element_inside_any_table(element,page,tables):
199
+
200
+ # Check if the element is text element
201
+ if isinstance(element, LTTextContainer):
202
+ # Use the function to extract the text and format for each text element
203
+ (line_text, format_per_line) = text_extraction(element)
204
+ # Append the text of each line to the page text
205
+ page_text.append(line_text)
206
+ # Append the format for each line containing text
207
+ line_format.append(format_per_line)
208
+ page_content.append(line_text)
209
+
210
+
211
+ # Check the elements for images
212
+ if isinstance(element, LTFigure):
213
+ # Crop the image from PDF
214
+ crop_image(element, pageObj)
215
+ # Convert the croped pdf to image
216
+ convert_to_images('cropped_image.pdf')
217
+ # Extract the text from image
218
+ image_text = image_to_text('PDF_image.png')
219
+ image_text = "" # removed to remove the errors with image
220
+ text_from_images.append(image_text)
221
+ page_content.append(image_text)
222
+ # Add a placeholder in the text and format lists
223
+ #page_text.append('image')
224
+ #line_format.append('image')
225
+ # Update the flag for image detection
226
+ image_flag = True
227
+
228
+
229
+ # Create the key of the dictionary
230
+ dctkey = 'Page_'+str(pagenum)
231
+ print(dctkey)
232
+
233
+ # Add the list of list as value of the page key
234
+ #text_per_page[dctkey]= [page_text, line_format, text_from_images,text_from_tables, page_content]
235
+ text_per_page[dctkey]= [page_text, text_from_images,text_from_tables, page_content]
236
+ #result = result.join(page_text).join(line_format).join(text_from_images).join(text_from_tables).join(page_content)
237
+ result = " "
238
+ for t in range(number_of_pages):
239
+ page = 'Page_'+str(t)
240
+ #result = result.join(map(str, text_per_page[page]))
241
+ for q in range(len(text_per_page[page])):
242
+ #print(f"{''.join(map(str, text_per_page[page][q]))}")
243
+ result = result + f"{''.join(map(str, text_per_page[page][q]))}"
244
+
245
+ return result
246
+
247
+ return True
248
+
249
+ def save_to_vector_store(text):
250
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
251
+ docs = text_splitter.create_documents(text)
252
+ vectorstore = FAISS.from_documents(documents=docs, embedding=OpenAIEmbeddings(api_key=OPENAI_API_KEY))
253
+ vectorstore.save_local(DB_FAISS_PATH, index_name="njmvc_Index")
254
+ #create a new file named vectorstore in your current directory.
255
+ if __name__=="__main__":
256
+ DB_FAISS_PATH = 'vectorstore/db_faiss'
257
+ file_name = "./data/drivermanual-2-very-small.pdf"
258
+ #loader=read_file_get_prompts(file_name)
259
+ text=read_file_get_prompts(file_name)
260
+ #pdfReaded = PyPDF2.PdfReader(file_name)
261
+ #docs=loader.load()
262
+ save_to_vector_store(text)
263
+ #save_to_vector_store(text)
264
+
265
+