valencar's picture
Update app.py
cb64b09 verified
raw
history blame
453 Bytes
import streamlit as st
# Load model directly
from transformers import AutoModel
file = 'llama-2-7b.Q4_K_M.gguf'
NO_GPU = 0
GPU_LAYERS = 50
model = AutoModel.from_pretrained("TheBloke/Llama-2-7B-GGUF" + file,
model_file=file, model_type="llama", gpu_layers=NO_GPU)
prompt = "AI is going to"
with st.container():
st.write('\n\n')
st.write(prompt)
answer = llm(prompt)
st.write(answer)
print(answer)