import streamlit as st | |
# Load model directly | |
from transformers import AutoModel, AutoModelForCausalLM | |
from huggingface_hub import login | |
import os | |
file = 'llama-2-7b.Q5_0.gguf' | |
NO_GPU = 0 | |
GPU_LAYERS = 50 | |
llm = AutoModelForCausalLM.from_pretrained(file, model_type="llama", gpu_layers=NO_GPU) | |
# model = AutoModelForCausalLM.from_pretrained("valencar/llamm", | |
# model_file=file, model_type="llama", gpu_layers=NO_GPU) | |
# access_token = os.getenv('HF_TOKEN2') | |
# login(token = access_token) | |
prompt = "AI is going to" | |
with st.container(): | |
st.write('\n\n') | |
st.write(prompt) | |
answer = llm(prompt) | |
st.write(answer) | |
print(answer) |