import streamlit as st # Load model directly from transformers import AutoModel from huggingface_hub import login import os file = 'llama-2-7b.Q4_K_M.gguf' NO_GPU = 0 GPU_LAYERS = 50 model = AutoModel.from_pretrained("", model_file=file, model_type="llama", gpu_layers=NO_GPU) access_token = os.getenv('HF_TOKEN2') login(token = access_token) prompt = "AI is going to" with st.container(): st.write('\n\n') st.write(prompt) answer = llm(prompt) st.write(answer) print(answer)