from transformers import SegformerFeatureExtractor, SegformerForSemanticSegmentation from PIL import Image feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b3-finetuned-cityscapes-1024-1024") model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b3-finetuned-cityscapes-1024-1024") # url = "http://images.cocodataset.org/val2017/000000039769.jpg" # image = Image.open(requests.get(url, stream=True).raw) images = ['image1', 'image2', 'image3'] images = Image.open(images) inputs = feature_extractor(images=images, return_tensors="pt") outputs = model(**inputs) logits = outputs.logits # shape (batch_size, num_labels, height/4, width/4)