import torch from torch import nn from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation from PIL import Image import matplotlib.pyplot as plt import requests import gradio as gr import numpy as np # convenience expression for automatically determining device device = ( "cuda" # Device for NVIDIA or AMD GPUs if torch.cuda.is_available() else "mps" # Device for Apple Silicon (Metal Performance Shaders) if torch.backends.mps.is_available() else "cpu" ) # Load models image_processor = SegformerImageProcessor.from_pretrained("jonathandinu/face-parsing") model = SegformerForSemanticSegmentation.from_pretrained("jonathandinu/face-parsing") model.to(device) # Inference function def infer(image: Image.Image) -> np.ndarray: # Preprocess image inputs = image_processor(images=image, return_tensors="pt").to(device) outputs = model(**inputs) logits = outputs.logits # shape (batch_size, num_labels, ~height/4, ~width/4) # Resize output to match input image dimensions upsampled_logits = nn.functional.interpolate(logits, size=image.size[::-1], # H x W mode='bilinear', align_corners=False) # Get label masks labels = upsampled_logits.argmax(dim=1)[0] # Move to CPU to visualize in matplotlib labels_viz = labels.cpu().numpy() return labels_viz # Create Gradio interface iface = gr.Interface( fn=infer, # the function to be used for inference inputs=gr.inputs.Image(type="pil"), # input type (image) outputs=gr.outputs.Image(type="numpy"), # output type (image as numpy array) live=True, # run inference live as the image is uploaded title="Face Parsing with Segformer", # interface title description="Upload an image to perform face parsing using the Segformer model for semantic segmentation." # description ) # Launch the interface iface.launch()