import torch import torch.nn.functional as F from transformers import AutoTokenizer, AutoModel, AutoImageProcessor import gradio as gr import spaces processor = AutoImageProcessor.from_pretrained("nomic-ai/nomic-embed-vision-v1.5") vision_model = AutoModel.from_pretrained("nomic-ai/nomic-embed-vision-v1.5", trust_remote_code=True) @spaces.GPU def ImgEmbed(image): print(image); inputs = processor(image, return_tensors="pt") img_emb = vision_model(**inputs).last_hidden_state img_embeddings = F.normalize(img_emb[:, 0], p=2, dim=1) return img_embeddings[0].tolist(); with gr.Blocks() as demo: img = gr.Image(); out = gr.Text(); btn = gr.Button("Get Embeddings") btn.click(ImgEmbed, [img], [out]) if __name__ == "__main__": demo.launch(show_api=True)