import streamlit as st from llama_cpp import Llama # Title for your app st.title("Llama-3-8B-Physics Master - Chatbot") # Load the model from Hugging Face using llama_cpp @st.cache_resource def load_model(): # Load the model from the Hugging Face Hub model = Llama.from_pretrained( repo_id="gallen881/Llama-3-8B-Physics_Master-GGUF", filename="unsloth.F16.gguf" # or unsloth.Q4_K_M.gguf for a smaller file ) return model # Load the model once and store it in cache model = load_model() # Text input for the user user_input = st.text_area("Enter your message here:") if st.button("Generate Response"): if user_input: # Create chat completion with the model response = model.create_chat_completion( messages=[ { "role": "user", "content": user_input } ] ) # Extract the content from the model's response st.write("Model Response:", response['choices'][0]['message']['content']) else: st.write("Please enter a message.")