GGUF
llama
llamm / app.py
valencar's picture
Update app.py
9804ed3 verified
raw
history blame
679 Bytes
import streamlit as st
# Load model directly
from transformers import AutoModel, AutoModelForCausalLM
from huggingface_hub import login
import os
file = 'llama-2-7b.Q5_0.gguf'
NO_GPU = 0
GPU_LAYERS = 50
llm = AutoModelForCausalLM.from_pretrained(file, model_type="llama", gpu_layers=NO_GPU)
# model = AutoModelForCausalLM.from_pretrained("valencar/llamm",
# model_file=file, model_type="llama", gpu_layers=NO_GPU)
# access_token = os.getenv('HF_TOKEN2')
# login(token = access_token)
prompt = "AI is going to"
with st.container():
st.write('\n\n')
st.write(prompt)
answer = llm(prompt)
st.write(answer)
print(answer)