import streamlit as st
from llama_cpp import Llama

# Title for your app
st.title("Llama-3-8B-Physics Master - Chatbot")

# Load the model from Hugging Face using llama_cpp
@st.cache_resource
def load_model():
    # Load the model from the Hugging Face Hub
    model = Llama.from_pretrained(
        repo_id="gallen881/Llama-3-8B-Physics_Master-GGUF",
        filename="unsloth.F16.gguf"  # or unsloth.Q4_K_M.gguf for a smaller file
    )
    return model

# Load the model once and store it in cache
model = load_model()

# Text input for the user
user_input = st.text_area("Enter your message here:")

if st.button("Generate Response"):
    if user_input:
        # Create chat completion with the model
        response = model.create_chat_completion(
            messages=[
                {
                    "role": "user",
                    "content": user_input
                }
            ]
        )

        # Extract the content from the model's response
        st.write("Model Response:", response['choices'][0]['message']['content'])

    else:
        st.write("Please enter a message.")