|
import streamlit as st |
|
from langchain.llms import LlamaCpp |
|
from langchain.memory import ConversationBufferMemory |
|
from langchain.chains import ConversationalRetrievalChain |
|
|
|
|
|
st.set_page_config(page_title="Simple AI Chatbot") |
|
st.header("Simple AI Chatbot") |
|
|
|
|
|
@st.experimental_singleton |
|
def initialize_chain(): |
|
n_gpu_layers = 20 |
|
n_batch = 1024 |
|
|
|
llm = LlamaCpp( |
|
model_path="models/mistral-7b-instruct-v0.1.Q5_0.gguf", |
|
n_gpu_layers=n_gpu_layers, |
|
n_batch=n_batch, |
|
n_ctx=2048, |
|
temperature=0, |
|
verbose=False, |
|
streaming=True, |
|
) |
|
|
|
|
|
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) |
|
|
|
|
|
chat_chain = ConversationalChain(llm=llm, memory=memory, verbose=False) |
|
return chat_chain |
|
|
|
llm_chain = initialize_chain() |
|
|
|
if "messages" not in st.session_state: |
|
st.session_state.messages = [{"role": "assistant", "content": "Hello! How can I assist you today?"}] |
|
|
|
|
|
for message in st.session_state.messages: |
|
with st.chat_message(message["role"]): |
|
st.markdown(message["content"]) |
|
|
|
|
|
user_input = st.chat_input("Type your message...", key="user_input") |
|
if user_input: |
|
|
|
st.session_state.messages.append({"role": "user", "content": user_input}) |
|
|
|
|
|
response = llm_chain.run(user_input) |
|
|
|
|
|
st.session_state.messages.append({"role": "assistant", "content": response}) |
|
|
|
|
|
with st.chat_message("assistant"): |
|
st.markdown(response) |
|
|