import streamlit as st
from langchain.llms import LlamaCpp
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

# Streamlit page configuration
st.set_page_config(page_title="Simple AI Chatbot")
st.header("Simple AI Chatbot")

# Initialize the Language Model Chain
@st.experimental_singleton
def initialize_chain():
    n_gpu_layers = 20
    n_batch = 1024

    llm = LlamaCpp(
        model_path="models/mistral-7b-instruct-v0.1.Q5_0.gguf",
        n_gpu_layers=n_gpu_layers,
        n_batch=n_batch,
        n_ctx=2048,
        temperature=0,
        verbose=False,
        streaming=True,
    )

    # Setup memory for contextual conversation
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

    # Initialize the conversational chain
    chat_chain = ConversationalChain(llm=llm, memory=memory, verbose=False)
    return chat_chain

llm_chain = initialize_chain()

if "messages" not in st.session_state:
    st.session_state.messages = [{"role": "assistant", "content": "Hello! How can I assist you today?"}]

# Display conversation messages
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# Handling user input
user_input = st.chat_input("Type your message...", key="user_input")
if user_input:
    # Append user message to the conversation
    st.session_state.messages.append({"role": "user", "content": user_input})

    # Get response from the LLM
    response = llm_chain.run(user_input)

    # Append LLM response to the conversation
    st.session_state.messages.append({"role": "assistant", "content": response})

    # Update chat window with the assistant's response
    with st.chat_message("assistant"):
        st.markdown(response)