--- license: mit language: - en --- [Trained](https://ahxxm.com/179.moew/) on images labeled by myself. 2-step inference: ```python from PIL import Image import torch import torch.nn as nn import uform path = "image.jpg" # generate 768 dimension embeddings for an image uf_model = uform.get_model("unum-cloud/uform-vl-english") img = Image.open(path) image_data = uf_model.preprocess_image(img) text_data = uf_model.preprocess_text(path.name) # filename not necessarily useful, but encode anyway memb = uf_model.encode_multimodal(image=image_data, text=text_data) memb = memb.detach().numpy() assert memb.shape == (1, 768) # load model, infer and Sigmoid class ImageBinaryClassifier(nn.Module): def __init__(self): super().__init__() self.layers = nn.Sequential( nn.Linear(768, 2048), nn.ReLU(), nn.Linear(2048, 2048), nn.ReLU(), nn.Linear(2048, 2048), nn.ReLU(), nn.Linear(2048, 256), nn.ReLU(), nn.Linear(256, 1), ) def forward(self, x): return self.layers(x) saved_model = ImageBinaryClassifier() saved_model.load_state_dict(torch.load("model.pt", map_location=torch.device('cpu'))) saved_model = torch.compile(saved_model) saved_model.eval() prob = float(torch.sigmoid(saved_model(memb))) ```