import os from chatBot.common.pdfToText import loadLatestPdf os.environ["OPENAI_API_KEY"] = "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" from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import CharacterTextSplitter import pickle import faiss from langchain.vectorstores import FAISS from langchain.embeddings import OpenAIEmbeddings from langchain.chains import RetrievalQAWithSourcesChain from langchain.chains.question_answering import load_qa_chain from langchain import OpenAI urls = [ 'http://en.espn.co.uk/f1/motorsport/story/3836.html', 'https://www.mercedes-amg-hpp.com/formula-1-engine-facts/#' , 'https://www.redbullracing.com/int-en/five-things-about-yas-marina' , 'https://www.redbull.com/gb-en/history-of-formula-1' , 'https://www.formula1.com/en/information.abu-dhabi-yas-marina-circuit-yas-island.4YtOtpaWvaxWvDBTItP7s6.html'] data = loadLatestPdf() text_splitter = CharacterTextSplitter(separator='\n', chunk_size=1000, chunk_overlap=200) docs = text_splitter.split_documents(data) embeddings = OpenAIEmbeddings() vectorStore1_openAI = FAISS.from_documents(docs, embeddings) with open("faiss_store_openai.pkl", "wb") as f: pickle.dump(vectorStore1_openAI, f) with open("faiss_store_openai.pkl", "rb") as f: VectorStore = pickle.load(f) llm=OpenAI(temperature=0.8, verbose = True) gptModel = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=VectorStore.as_retriever())