from transformers import SegformerFeatureExtractor, SegformerForSemanticSegmentation import gradio as gr import numpy as np from PIL import Image # Load the model and feature extractor model = SegformerForSemanticSegmentation.from_pretrained("mattmdjaga/segformer_b2_clothes") feature_extractor = SegformerFeatureExtractor.from_pretrained("mattmdjaga/segformer_b2_clothes") def predict(image): # Prepare the image for the model inputs = feature_extractor(images=image, return_tensors="pt") # Get model outputs outputs = model(**inputs) # Get the segmentation logits logits = outputs.logits # Apply softmax to get probabilities probabilities = logits.softmax(dim=1) # shape: (batch_size, num_classes, height, width) # Get the predicted class for each pixel predicted_class = probabilities.argmax(dim=1).squeeze().cpu().numpy() # shape: (height, width) # Create a color map (you can define your own color mapping for different classes) color_map = np.array([[0, 0, 0], # Class 0 - background [255, 0, 0], # Class 1 - red [0, 255, 0], # Class 2 - green [0, 0, 255]]) # Class 3 - blue # Create an output mask image mask_image = color_map[predicted_class] # Map class indices to colors mask_image = Image.fromarray(mask_image.astype('uint8')) # Convert to PIL Image return mask_image def segmentation_interface(image): return predict(image) # Create a Gradio interface for image segmentation gr.Interface(fn=segmentation_interface, inputs="image", outputs="image").launch()