Th3BossC's picture
initial commit
23d152f
raw
history blame
1.53 kB
import os
from chatBot.common.pdfToText import loadLatestPdf
os.environ["OPENAI_API_KEY"] = "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"
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
import pickle
import faiss
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.question_answering import load_qa_chain
from langchain import OpenAI
urls = [
'http://en.espn.co.uk/f1/motorsport/story/3836.html', 'https://www.mercedes-amg-hpp.com/formula-1-engine-facts/#' , 'https://www.redbullracing.com/int-en/five-things-about-yas-marina' , 'https://www.redbull.com/gb-en/history-of-formula-1'
, 'https://www.formula1.com/en/information.abu-dhabi-yas-marina-circuit-yas-island.4YtOtpaWvaxWvDBTItP7s6.html']
data = loadLatestPdf()
text_splitter = CharacterTextSplitter(separator='\n',
chunk_size=1000,
chunk_overlap=200)
docs = text_splitter.split_documents(data)
embeddings = OpenAIEmbeddings()
vectorStore1_openAI = FAISS.from_documents(docs, embeddings)
with open("faiss_store_openai.pkl", "wb") as f:
pickle.dump(vectorStore1_openAI, f)
with open("faiss_store_openai.pkl", "rb") as f:
VectorStore = pickle.load(f)
llm=OpenAI(temperature=0.8, verbose = True)
gptModel = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=VectorStore.as_retriever())