Spaces:
Runtime error
Runtime error
| from langchain_community.document_loaders import DataFrameLoader | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_core.prompts import ChatPromptTemplate | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_community.llms import HuggingFaceHub | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.chains.combine_documents import create_stuff_documents_chain | |
| from langchain.chains import create_retrieval_chain | |
| import os | |
| #from dotenv import load_dotenv | |
| #Load environmental variables from .env-file | |
| #load_dotenv() | |
| # Load documents to create a vectorstore later | |
| def load_documents(df): | |
| # To Do: Create one initial vectore store loading all the documents with this function | |
| #loader = CSVLoader(index_name, source_column="speech_content") #unprocessed csv file | |
| loader = DataFrameLoader(data_frame=df, page_content_column='speech_content') #df | |
| data = loader.load() | |
| splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=1024, | |
| chunk_overlap=32, | |
| length_function=len, | |
| is_separator_regex=False, | |
| ) | |
| documents = splitter.split_documents(documents=data) | |
| return documents | |
| def get_vectorstore(embeddings, folder_path, index_name): | |
| path = folder_path + "/" + index_name | |
| print(path) | |
| # To Do: Dynamicly update and merge verctorstores | |
| #if os.path.exists(path): | |
| db = FAISS.load_local(folder_path=folder_path, index_name=index_name, | |
| embeddings=embeddings, allow_dangerous_deserialization=True) | |
| #else: | |
| #db = FAISS.from_documents(documents, embeddings) | |
| #db.save_local(folder_path=folder_path, index_name=index_name) | |
| #pass | |
| return db | |
| # Apply RAG by providing the context and the question to the LLM using the predefined template | |
| def RAG(llm, prompt, db, question): | |
| document_chain = create_stuff_documents_chain(llm=llm, prompt=prompt) | |
| retriever = db.as_retriever() | |
| retrieval_chain = create_retrieval_chain(retriever, document_chain) | |
| response = retrieval_chain.invoke({"input": question}) | |
| return response | |