jayson1408 commited on
Commit
cce9ed4
·
verified ·
1 Parent(s): c1ee035

Upload 9 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ demo.png filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ library_name: transformers
4
+ tags:
5
+ - vision
6
+ - image-segmentation
7
+ - nvidia/mit-b5
8
+ - transformers.js
9
+ - onnx
10
+ datasets:
11
+ - celebamaskhq
12
+ ---
13
+
14
+ # Face Parsing
15
+
16
+ ![example image and output](demo.png)
17
+
18
+ [Semantic segmentation](https://huggingface.co/docs/transformers/tasks/semantic_segmentation) model fine-tuned from [nvidia/mit-b5](https://huggingface.co/nvidia/mit-b5) with [CelebAMask-HQ](https://github.com/switchablenorms/CelebAMask-HQ) for face parsing. For additional options, see the Transformers [Segformer docs](https://huggingface.co/docs/transformers/model_doc/segformer).
19
+
20
+ > ONNX model for web inference contributed by [Xenova](https://huggingface.co/Xenova).
21
+
22
+ ## Usage in Python
23
+
24
+ Exhaustive list of labels can be extracted from [config.json](https://huggingface.co/jonathandinu/face-parsing/blob/65972ac96180b397f86fda0980bbe68e6ee01b8f/config.json#L30).
25
+
26
+ | id | label | note |
27
+ | :-: | :--------- | :---------------- |
28
+ | 0 | background | |
29
+ | 1 | skin | |
30
+ | 2 | nose | |
31
+ | 3 | eye_g | eyeglasses |
32
+ | 4 | l_eye | left eye |
33
+ | 5 | r_eye | right eye |
34
+ | 6 | l_brow | left eyebrow |
35
+ | 7 | r_brow | right eyebrow |
36
+ | 8 | l_ear | left ear |
37
+ | 9 | r_ear | right ear |
38
+ | 10 | mouth | area between lips |
39
+ | 11 | u_lip | upper lip |
40
+ | 12 | l_lip | lower lip |
41
+ | 13 | hair | |
42
+ | 14 | hat | |
43
+ | 15 | ear_r | earring |
44
+ | 16 | neck_l | necklace |
45
+ | 17 | neck | |
46
+ | 18 | cloth | clothing |
47
+
48
+ ```python
49
+ import torch
50
+ from torch import nn
51
+ from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
52
+
53
+ from PIL import Image
54
+ import matplotlib.pyplot as plt
55
+ import requests
56
+
57
+ # convenience expression for automatically determining device
58
+ device = (
59
+ "cuda"
60
+ # Device for NVIDIA or AMD GPUs
61
+ if torch.cuda.is_available()
62
+ else "mps"
63
+ # Device for Apple Silicon (Metal Performance Shaders)
64
+ if torch.backends.mps.is_available()
65
+ else "cpu"
66
+ )
67
+
68
+ # load models
69
+ image_processor = SegformerImageProcessor.from_pretrained("jonathandinu/face-parsing")
70
+ model = SegformerForSemanticSegmentation.from_pretrained("jonathandinu/face-parsing")
71
+ model.to(device)
72
+
73
+ # expects a PIL.Image or torch.Tensor
74
+ url = "https://images.unsplash.com/photo-1539571696357-5a69c17a67c6"
75
+ image = Image.open(requests.get(url, stream=True).raw)
76
+
77
+ # run inference on image
78
+ inputs = image_processor(images=image, return_tensors="pt").to(device)
79
+ outputs = model(**inputs)
80
+ logits = outputs.logits # shape (batch_size, num_labels, ~height/4, ~width/4)
81
+
82
+ # resize output to match input image dimensions
83
+ upsampled_logits = nn.functional.interpolate(logits,
84
+ size=image.size[::-1], # H x W
85
+ mode='bilinear',
86
+ align_corners=False)
87
+
88
+ # get label masks
89
+ labels = upsampled_logits.argmax(dim=1)[0]
90
+
91
+ # move to CPU to visualize in matplotlib
92
+ labels_viz = labels.cpu().numpy()
93
+ plt.imshow(labels_viz)
94
+ plt.show()
95
+ ```
96
+
97
+ ## Usage in the browser (Transformers.js)
98
+
99
+ ```js
100
+ import {
101
+ pipeline,
102
+ env,
103
+ } from "https://cdn.jsdelivr.net/npm/@xenova/[email protected]";
104
+
105
+ // important to prevent errors since the model files are likely remote on HF hub
106
+ env.allowLocalModels = false;
107
+
108
+ // instantiate image segmentation pipeline with pretrained face parsing model
109
+ model = await pipeline("image-segmentation", "jonathandinu/face-parsing");
110
+
111
+ // async inference since it could take a few seconds
112
+ const output = await model(url);
113
+
114
+ // each label is a separate mask object
115
+ // [
116
+ // { score: null, label: 'background', mask: transformers.js RawImage { ... }}
117
+ // { score: null, label: 'hair', mask: transformers.js RawImage { ... }}
118
+ // ...
119
+ // ]
120
+ for (const m of output) {
121
+ print(`Found ${m.label}`);
122
+ m.mask.save(`${m.label}.png`);
123
+ }
124
+ ```
125
+
126
+ ### p5.js
127
+
128
+ Since [p5.js](https://p5js.org/) uses an animation loop abstraction, we need to take care loading the model and making predictions.
129
+
130
+ ```js
131
+ // ...
132
+
133
+ // asynchronously load transformers.js and instantiate model
134
+ async function preload() {
135
+ // load transformers.js library with a dynamic import
136
+ const { pipeline, env } = await import(
137
+ "https://cdn.jsdelivr.net/npm/@xenova/[email protected]"
138
+ );
139
+
140
+ // important to prevent errors since the model files are remote on HF hub
141
+ env.allowLocalModels = false;
142
+
143
+ // instantiate image segmentation pipeline with pretrained face parsing model
144
+ model = await pipeline("image-segmentation", "jonathandinu/face-parsing");
145
+
146
+ print("face-parsing model loaded");
147
+ }
148
+
149
+ // ...
150
+ ```
151
+
152
+ [full p5.js example](https://editor.p5js.org/jonathan.ai/sketches/wZn15Dvgh)
153
+
154
+ ### Model Description
155
+
156
+ - **Developed by:** [Jonathan Dinu](https://twitter.com/jonathandinu)
157
+ - **Model type:** Transformer-based semantic segmentation image model
158
+ - **License:** non-commercial research and educational purposes
159
+ - **Resources for more information:** Transformers docs on [Segformer](https://huggingface.co/docs/transformers/model_doc/segformer) and/or the [original research paper](https://arxiv.org/abs/2105.15203).
160
+
161
+ ## Limitations and Bias
162
+
163
+ ### Bias
164
+
165
+ While the capabilities of computer vision models are impressive, they can also reinforce or exacerbate social biases. The [CelebAMask-HQ](https://github.com/switchablenorms/CelebAMask-HQ) dataset used for fine-tuning is large but not necessarily perfectly diverse or representative. Also, they are images of.... just celebrities.
config.json ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "jonathandinu/face-parsing",
3
+ "architectures": [
4
+ "SegformerForSemanticSegmentation"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "classifier_dropout_prob": 0.1,
8
+ "decoder_hidden_size": 768,
9
+ "depths": [
10
+ 3,
11
+ 6,
12
+ 40,
13
+ 3
14
+ ],
15
+ "downsampling_rates": [
16
+ 1,
17
+ 4,
18
+ 8,
19
+ 16
20
+ ],
21
+ "drop_path_rate": 0.1,
22
+ "hidden_act": "gelu",
23
+ "hidden_dropout_prob": 0.0,
24
+ "hidden_sizes": [
25
+ 64,
26
+ 128,
27
+ 320,
28
+ 512
29
+ ],
30
+ "id2label": {
31
+ "0": "background",
32
+ "1": "skin",
33
+ "2": "nose",
34
+ "3": "eye_g",
35
+ "4": "l_eye",
36
+ "5": "r_eye",
37
+ "6": "l_brow",
38
+ "7": "r_brow",
39
+ "8": "l_ear",
40
+ "9": "r_ear",
41
+ "10": "mouth",
42
+ "11": "u_lip",
43
+ "12": "l_lip",
44
+ "13": "hair",
45
+ "14": "hat",
46
+ "15": "ear_r",
47
+ "16": "neck_l",
48
+ "17": "neck",
49
+ "18": "cloth"
50
+ },
51
+ "image_size": 224,
52
+ "initializer_range": 0.02,
53
+ "label2id": {
54
+ "background": 0,
55
+ "skin": 1,
56
+ "nose": 2,
57
+ "eye_g": 3,
58
+ "l_eye": 4,
59
+ "r_eye": 5,
60
+ "l_brow": 6,
61
+ "r_brow": 7,
62
+ "l_ear": 8,
63
+ "r_ear": 9,
64
+ "mouth": 10,
65
+ "u_lip": 11,
66
+ "l_lip": 12,
67
+ "hair": 13,
68
+ "hat": 14,
69
+ "ear_r": 15,
70
+ "neck_l": 16,
71
+ "neck": 17,
72
+ "cloth": 18
73
+ },
74
+ "layer_norm_eps": 1e-06,
75
+ "mlp_ratios": [
76
+ 4,
77
+ 4,
78
+ 4,
79
+ 4
80
+ ],
81
+ "model_type": "segformer",
82
+ "num_attention_heads": [
83
+ 1,
84
+ 2,
85
+ 5,
86
+ 8
87
+ ],
88
+ "num_channels": 3,
89
+ "num_encoder_blocks": 4,
90
+ "patch_sizes": [
91
+ 7,
92
+ 3,
93
+ 3,
94
+ 3
95
+ ],
96
+ "reshape_last_stage": true,
97
+ "semantic_loss_ignore_index": 255,
98
+ "sr_ratios": [
99
+ 8,
100
+ 4,
101
+ 2,
102
+ 1
103
+ ],
104
+ "strides": [
105
+ 4,
106
+ 2,
107
+ 2,
108
+ 2
109
+ ],
110
+ "transformers_version": "4.37.0.dev0"
111
+ }
demo.png ADDED

Git LFS Details

  • SHA256: 31c74d29ab9e45f3401f404f7bfc09e2cf9f5825611f07dc20b25d00eb1cac8a
  • Pointer size: 131 Bytes
  • Size of remote file: 645 kB
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2bec795a8c243db71bd95be538fd62559003566466c71237e45c99b920f4b62
3
+ size 338580732
onnx/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d4e67af60ff78184745ebf74cc15163c0adc27d45cdeba31e3a03d1096fb8c3
3
+ size 340316611
onnx/model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bab9bfb3cb979f3098ac3b934b1641dbf87f835e0b03c2ca6d88dcf18c83d27
3
+ size 89439678
preprocessor_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_reduce_labels": false,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.485,
8
+ 0.456,
9
+ 0.406
10
+ ],
11
+ "image_processor_type": "SegformerFeatureExtractor",
12
+ "image_std": [
13
+ 0.229,
14
+ 0.224,
15
+ 0.225
16
+ ],
17
+ "resample": 2,
18
+ "rescale_factor": 0.00392156862745098,
19
+ "size": {
20
+ "height": 512,
21
+ "width": 512
22
+ }
23
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0139f52e953a00ca01d86faf7363f067a535291a003c096dd9c56b09d8945f1
3
+ size 338821701
quantize_config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_channel": true,
3
+ "reduce_range": true,
4
+ "per_model_config": {
5
+ "model": {
6
+ "op_types": [
7
+ "Unsqueeze",
8
+ "Shape",
9
+ "Transpose",
10
+ "Sqrt",
11
+ "Gather",
12
+ "Slice",
13
+ "Erf",
14
+ "Div",
15
+ "Reshape",
16
+ "Add",
17
+ "Cast",
18
+ "Sub",
19
+ "Concat",
20
+ "ReduceMean",
21
+ "Mul",
22
+ "Conv",
23
+ "Constant",
24
+ "Resize",
25
+ "Softmax",
26
+ "Pow",
27
+ "Relu",
28
+ "MatMul"
29
+ ],
30
+ "weight_type": "QUInt8"
31
+ }
32
+ }
33
+ }