File size: 961 Bytes
5bcc73a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
import streamlit as st
from transformers import (
PreTrainedTokenizerFast,
VisionEncoderDecoderModel,
ViTImageProcessor,
)
model_name = "grascii/gregg-vision-v0.2.1"
@st.cache_resource(show_spinner=f"Loading {model_name}")
def load_model():
model = VisionEncoderDecoderModel.from_pretrained(
model_name, token=st.secrets.HF_TOKEN
)
tokenizer = PreTrainedTokenizerFast.from_pretrained(
model_name,
token=st.secrets.HF_TOKEN,
)
processor = ViTImageProcessor.from_pretrained(model_name, token=st.secrets.HF_TOKEN)
return model, tokenizer, processor
@st.cache_data(ttl=3600, show_spinner=f"Running {model_name}")
def run_vision(image):
model, tokenizer, processor = load_model()
pixel_values = processor(image, return_tensors="pt").pixel_values
generated = model.generate(pixel_values, max_new_tokens=12)[0]
return tokenizer.convert_ids_to_tokens(generated, skip_special_tokens=True)
|