import streamlit as st
from transformers import AutoModel, AutoTokenizer
import torch

# Title for your app
st.title("Llama-3-8B-Physics Master - Model Inference")

# Load the model and tokenizer from Hugging Face
@st.cache_resource
def load_model():
    model = AutoModel.from_pretrained("gallen881/Llama-3-8B-Physics_Master-GGUF")
    tokenizer = AutoTokenizer.from_pretrained("gallen881/Llama-3-8B-Physics_Master-GGUF")
    return model, tokenizer

# Load the model once and store it in cache
model, tokenizer = load_model()

# Text input for the user
user_input = st.text_area("Enter your input here:")

if st.button("Generate Output"):
    if user_input:
        # Tokenize the input
        inputs = tokenizer(user_input, return_tensors="pt")
        
        # Forward pass through the model
        with torch.no_grad():
            outputs = model(**inputs)
        
        # Get the output embeddings or logits (depending on the model structure)
        # For example, let's say we want to display embeddings
        st.write("Model Output Embeddings:", outputs.last_hidden_state)

    else:
        st.write("Please enter some input.")