Spaces:

nklomp
/

rag

Runtime error

App Files Files Community

nklomp commited on Feb 29, 2024

Commit

9eaa8cc

verified ·

1 Parent(s): 4d2daf5

Create app.py

Browse files

Files changed (1) hide show

app.py +217 -0

app.py ADDED Viewed

	@@ -0,0 +1,217 @@

+import streamlit as st
+from dotenv import load_dotenv
+from PyPDF2 import PdfReader
+from langchain.text_splitter import CharacterTextSplitter
+from langchain_community.embeddings import HuggingFaceInstructEmbeddings
+from langchain_openai import OpenAIEmbeddings,ChatOpenAI
+from langchain_community.vectorstores import FAISS
+from langchain.memory import ConversationBufferMemory
+from langchain.chains import ConversationalRetrievalChain
+from htmlTemplates import css, bot_template, user_template
+from langchain_community.llms import HuggingFaceHub
+#Llama2
+import torch
+import transformers
+from langchain_community.llms import HuggingFacePipeline
+from transformers import AutoTokenizer
+from torch import cuda, bfloat16
+import langchain
+langchain.verbose = False
+def get_pdf_text(pdf_docs):
+    text = ""
+    for pdf in pdf_docs:
+        pdf_reader = PdfReader(pdf)
+        for page in pdf_reader.pages:
+            text += page.extract_text()
+    return text
+def get_text_chunks(text):
+    text_splitter = CharacterTextSplitter(
+        separator="\n",
+        chunk_size=1000, # the character length of the chunck
+        chunk_overlap=200, # the character length of the overlap between chuncks
+        length_function=len # the length function - in this case, character length (aka the python len() fn.)
+    )
+    chunks = text_splitter.split_text(text)
+    return chunks
+def get_vectorstore(text_chunks,selected_embedding):
+    if selected_embedding == 'OpenAI':
+        print('OpenAI embedding')
+        embeddings = OpenAIEmbeddings()
+    elif selected_embedding == 'Instructor-xl':
+        print('Instructor-xl embedding')
+        embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
+    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
+    vectorstore.save_local("faiss_index")
+    return vectorstore
+def load_vectorstore(text_chunks,selected_embedding):
+    if selected_embedding == 'OpenAI':
+        print('OpenAI embedding')
+        embeddings = OpenAIEmbeddings()
+    elif selected_embedding == 'Instructor-xl':
+        print('Instructor-xl embedding')
+    vectorstore = FAISS.load_local("faiss_index", embeddings)
+    return vectorstore
+def get_conversation_chain(vectorstore,selected_llm):
+    if selected_llm == 'OpenAI':
+        print('OpenAi LLM')
+        llm = ChatOpenAI()
+    elif selected_llm == 'Llama2':
+        print('Llama2 LLM')
+        model_id = 'meta-llama/Llama-2-7b-chat-hf'
+        hf_auth = hf_auth
+        model_config = transformers.AutoConfig.from_pretrained(
+            model_id,
+            token=hf_auth
+        )
+        device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
+        if('cuda' in device):
+            # set quantization configuration to load large model with less GPU memory
+            # this requires the `bitsandbytes` library
+            bnb_config = transformers.BitsAndBytesConfig(
+                load_in_4bit=True,
+                bnb_4bit_quant_type='nf4',
+                bnb_4bit_use_double_quant=True,
+                bnb_4bit_compute_dtype=bfloat16
+            )
+            model = transformers.AutoModelForCausalLM.from_pretrained(
+                model_id,
+                trust_remote_code=True,
+                config=model_config,
+                quantization_config=bnb_config,
+                device_map='auto',
+                token=hf_auth
+            )
+        else:
+            model = transformers.AutoModelForCausalLM.from_pretrained(
+                model_id,
+                trust_remote_code=True,
+                config=model_config,
+                device_map='auto',
+                token=hf_auth
+            )
+        # enable evaluation mode to allow model inference
+        model.eval()
+        print(f"Model loaded on {device}")
+        tokenizer = transformers.AutoTokenizer.from_pretrained(
+            model_id,
+            token=hf_auth
+        )
+        pipeline = transformers.pipeline(
+            torch_dtype=torch.float32,
+            model=model,
+            tokenizer=tokenizer,
+            return_full_text=True,  # langchain expects the full text
+            task='text-generation',
+            temperature=0.1,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
+            max_new_tokens=512,  # max number of tokens to generate in the output
+            repetition_penalty=1.1  # without this output begins repeating
+        )
+        llm = HuggingFacePipeline(pipeline=pipeline)
+    # Generic LLM
+    memory = ConversationBufferMemory(
+    memory_key='chat_history', return_messages=True)
+    conversation_chain = ConversationalRetrievalChain.from_llm(
+        llm=llm,
+        retriever=vectorstore.as_retriever(),
+        memory=memory,
+        return_source_documents=False
+    )
+    #print(conversation_chain)
+    return conversation_chain
+def handle_userinput(user_question):
+    print('Question: ' + user_question)
+    response = st.session_state.conversation({'question': user_question})
+    st.session_state.chat_history = response['chat_history']
+    for i, message in enumerate(st.session_state.chat_history):
+        if i % 2 == 0:
+            st.write(user_template.replace(
+                "{{MSG}}", message.content), unsafe_allow_html=True)
+        else:
+            st.write(bot_template.replace(
+                "{{MSG}}", message.content), unsafe_allow_html=True)
+def main():
+    load_dotenv()
+    st.set_page_config(page_title="VerAi",
+                       page_icon=":books:")
+    st.write(css, unsafe_allow_html=True)
+    if "conversation" not in st.session_state:
+        st.session_state.conversation = None
+    if "chat_history" not in st.session_state:
+        st.session_state.chat_history = None
+    with st.sidebar:
+        st.subheader("Your documents")
+        pdf_docs = st.file_uploader(
+            "Upload your new PDFs here and click on 'Process' or load the last upload by clicking on 'Load'", accept_multiple_files=True)
+        selected_embedding = st.radio("Which Embedding?",["OpenAI", "Instructor-xl"])
+        selected_llm = st.radio("Which LLM?",["OpenAI", "Llama2"])
+        if st.button("Process"):
+            with st.spinner("Processing"):
+                # get pdf text
+                raw_text = get_pdf_text(pdf_docs)
+                # get the text chunks
+                text_chunks = get_text_chunks(raw_text)
+                # create vector store
+                vectorstore = get_vectorstore(text_chunks,selected_embedding)
+                # create conversation chain
+                st.session_state.conversation = get_conversation_chain(
+                    vectorstore,selected_llm)
+        if st.button("Load"):
+            with st.spinner("Processing"):
+                # load vector store
+                vectorstore = load_vectorstore(selected_embedding,selected_embedding)
+                # create conversation chain
+                st.session_state.conversation = get_conversation_chain(
+                    vectorstore,selected_llm)
+    if st.session_state.conversation:
+        st.header("VerAi :books:")
+        user_question = st.text_input("Stel een vraag hieronder")
+        # Vertel me iets over Wettelijke uren
+        # wat zijn Overige verloftypes bij kpn
+    if st.session_state.conversation and user_question:
+        handle_userinput(user_question)
+if __name__ == '__main__':
+    main()