import streamlit as st from transformers import AutoModel, AutoTokenizer import torch # Title for your app st.title("Llama-3-8B-Physics Master - Model Inference") # Load the model and tokenizer from Hugging Face @st.cache_resource def load_model(): model = AutoModel.from_pretrained("gallen881/Llama-3-8B-Physics_Master-GGUF") tokenizer = AutoTokenizer.from_pretrained("gallen881/Llama-3-8B-Physics_Master-GGUF") return model, tokenizer # Load the model once and store it in cache model, tokenizer = load_model() # Text input for the user user_input = st.text_area("Enter your input here:") if st.button("Generate Output"): if user_input: # Tokenize the input inputs = tokenizer(user_input, return_tensors="pt") # Forward pass through the model with torch.no_grad(): outputs = model(**inputs) # Get the output embeddings or logits (depending on the model structure) # For example, let's say we want to display embeddings st.write("Model Output Embeddings:", outputs.last_hidden_state) else: st.write("Please enter some input.")