alexnasa commited on
Commit
7a1adef
·
verified ·
1 Parent(s): d047245

Update OmniAvatar/configs/model_config.py

Browse files
Files changed (1) hide show
  1. OmniAvatar/configs/model_config.py +17 -664
OmniAvatar/configs/model_config.py CHANGED
@@ -1,664 +1,17 @@
1
- from typing_extensions import Literal, TypeAlias
2
- from ..models.wan_video_dit import WanModel
3
- from ..models.wan_video_text_encoder import WanTextEncoder
4
- from ..models.wan_video_vae import WanVideoVAE
5
-
6
-
7
- model_loader_configs = [
8
- # These configs are provided for detecting model type automatically.
9
- # The format is (state_dict_keys_hash, state_dict_keys_hash_with_shape, model_names, model_classes, model_resource)
10
- (None, "9269f8db9040a9d860eaca435be61814", ["wan_video_dit"], [WanModel], "civitai"),
11
- (None, "aafcfd9672c3a2456dc46e1cb6e52c70", ["wan_video_dit"], [WanModel], "civitai"),
12
- (None, "6bfcfb3b342cb286ce886889d519a77e", ["wan_video_dit"], [WanModel], "civitai"),
13
- (None, "cb104773c6c2cb6df4f9529ad5c60d0b", ["wan_video_dit"], [WanModel], "diffusers"),
14
- (None, "9c8818c2cbea55eca56c7b447df170da", ["wan_video_text_encoder"], [WanTextEncoder], "civitai"),
15
- (None, "1378ea763357eea97acdef78e65d6d96", ["wan_video_vae"], [WanVideoVAE], "civitai"),
16
- (None, "ccc42284ea13e1ad04693284c7a09be6", ["wan_video_vae"], [WanVideoVAE], "civitai"),
17
- ]
18
- huggingface_model_loader_configs = [
19
- # These configs are provided for detecting model type automatically.
20
- # The format is (architecture_in_huggingface_config, huggingface_lib, model_name, redirected_architecture)
21
- ("ChatGLMModel", "diffsynth.models.kolors_text_encoder", "kolors_text_encoder", None),
22
- ("MarianMTModel", "transformers.models.marian.modeling_marian", "translator", None),
23
- ("BloomForCausalLM", "transformers.models.bloom.modeling_bloom", "beautiful_prompt", None),
24
- ("Qwen2ForCausalLM", "transformers.models.qwen2.modeling_qwen2", "qwen_prompt", None),
25
- # ("LlamaForCausalLM", "transformers.models.llama.modeling_llama", "omost_prompt", None),
26
- ("T5EncoderModel", "diffsynth.models.flux_text_encoder", "flux_text_encoder_2", "FluxTextEncoder2"),
27
- ("CogVideoXTransformer3DModel", "diffsynth.models.cog_dit", "cog_dit", "CogDiT"),
28
- ("SiglipModel", "transformers.models.siglip.modeling_siglip", "siglip_vision_model", "SiglipVisionModel"),
29
- ("LlamaForCausalLM", "diffsynth.models.hunyuan_video_text_encoder", "hunyuan_video_text_encoder_2", "HunyuanVideoLLMEncoder"),
30
- ("LlavaForConditionalGeneration", "diffsynth.models.hunyuan_video_text_encoder", "hunyuan_video_text_encoder_2", "HunyuanVideoMLLMEncoder"),
31
- ("Step1Model", "diffsynth.models.stepvideo_text_encoder", "stepvideo_text_encoder_2", "STEP1TextEncoder"),
32
- ]
33
-
34
- preset_models_on_huggingface = {
35
- "HunyuanDiT": [
36
- ("Tencent-Hunyuan/HunyuanDiT", "t2i/clip_text_encoder/pytorch_model.bin", "models/HunyuanDiT/t2i/clip_text_encoder"),
37
- ("Tencent-Hunyuan/HunyuanDiT", "t2i/mt5/pytorch_model.bin", "models/HunyuanDiT/t2i/mt5"),
38
- ("Tencent-Hunyuan/HunyuanDiT", "t2i/model/pytorch_model_ema.pt", "models/HunyuanDiT/t2i/model"),
39
- ("Tencent-Hunyuan/HunyuanDiT", "t2i/sdxl-vae-fp16-fix/diffusion_pytorch_model.bin", "models/HunyuanDiT/t2i/sdxl-vae-fp16-fix"),
40
- ],
41
- "stable-video-diffusion-img2vid-xt": [
42
- ("stabilityai/stable-video-diffusion-img2vid-xt", "svd_xt.safetensors", "models/stable_video_diffusion"),
43
- ],
44
- "ExVideo-SVD-128f-v1": [
45
- ("ECNU-CILab/ExVideo-SVD-128f-v1", "model.fp16.safetensors", "models/stable_video_diffusion"),
46
- ],
47
- # Stable Diffusion
48
- "StableDiffusion_v15": [
49
- ("benjamin-paine/stable-diffusion-v1-5", "v1-5-pruned-emaonly.safetensors", "models/stable_diffusion"),
50
- ],
51
- "DreamShaper_8": [
52
- ("Yntec/Dreamshaper8", "dreamshaper_8.safetensors", "models/stable_diffusion"),
53
- ],
54
- # Textual Inversion
55
- "TextualInversion_VeryBadImageNegative_v1.3": [
56
- ("gemasai/verybadimagenegative_v1.3", "verybadimagenegative_v1.3.pt", "models/textual_inversion"),
57
- ],
58
- # Stable Diffusion XL
59
- "StableDiffusionXL_v1": [
60
- ("stabilityai/stable-diffusion-xl-base-1.0", "sd_xl_base_1.0.safetensors", "models/stable_diffusion_xl"),
61
- ],
62
- "BluePencilXL_v200": [
63
- ("frankjoshua/bluePencilXL_v200", "bluePencilXL_v200.safetensors", "models/stable_diffusion_xl"),
64
- ],
65
- "StableDiffusionXL_Turbo": [
66
- ("stabilityai/sdxl-turbo", "sd_xl_turbo_1.0_fp16.safetensors", "models/stable_diffusion_xl_turbo"),
67
- ],
68
- # Stable Diffusion 3
69
- "StableDiffusion3": [
70
- ("stabilityai/stable-diffusion-3-medium", "sd3_medium_incl_clips_t5xxlfp16.safetensors", "models/stable_diffusion_3"),
71
- ],
72
- "StableDiffusion3_without_T5": [
73
- ("stabilityai/stable-diffusion-3-medium", "sd3_medium_incl_clips.safetensors", "models/stable_diffusion_3"),
74
- ],
75
- # ControlNet
76
- "ControlNet_v11f1p_sd15_depth": [
77
- ("lllyasviel/ControlNet-v1-1", "control_v11f1p_sd15_depth.pth", "models/ControlNet"),
78
- ("lllyasviel/Annotators", "dpt_hybrid-midas-501f0c75.pt", "models/Annotators")
79
- ],
80
- "ControlNet_v11p_sd15_softedge": [
81
- ("lllyasviel/ControlNet-v1-1", "control_v11p_sd15_softedge.pth", "models/ControlNet"),
82
- ("lllyasviel/Annotators", "ControlNetHED.pth", "models/Annotators")
83
- ],
84
- "ControlNet_v11f1e_sd15_tile": [
85
- ("lllyasviel/ControlNet-v1-1", "control_v11f1e_sd15_tile.pth", "models/ControlNet")
86
- ],
87
- "ControlNet_v11p_sd15_lineart": [
88
- ("lllyasviel/ControlNet-v1-1", "control_v11p_sd15_lineart.pth", "models/ControlNet"),
89
- ("lllyasviel/Annotators", "sk_model.pth", "models/Annotators"),
90
- ("lllyasviel/Annotators", "sk_model2.pth", "models/Annotators")
91
- ],
92
- "ControlNet_union_sdxl_promax": [
93
- ("xinsir/controlnet-union-sdxl-1.0", "diffusion_pytorch_model_promax.safetensors", "models/ControlNet/controlnet_union"),
94
- ("lllyasviel/Annotators", "dpt_hybrid-midas-501f0c75.pt", "models/Annotators")
95
- ],
96
- # AnimateDiff
97
- "AnimateDiff_v2": [
98
- ("guoyww/animatediff", "mm_sd_v15_v2.ckpt", "models/AnimateDiff"),
99
- ],
100
- "AnimateDiff_xl_beta": [
101
- ("guoyww/animatediff", "mm_sdxl_v10_beta.ckpt", "models/AnimateDiff"),
102
- ],
103
-
104
- # Qwen Prompt
105
- "QwenPrompt": [
106
- ("Qwen/Qwen2-1.5B-Instruct", "config.json", "models/QwenPrompt/qwen2-1.5b-instruct"),
107
- ("Qwen/Qwen2-1.5B-Instruct", "generation_config.json", "models/QwenPrompt/qwen2-1.5b-instruct"),
108
- ("Qwen/Qwen2-1.5B-Instruct", "model.safetensors", "models/QwenPrompt/qwen2-1.5b-instruct"),
109
- ("Qwen/Qwen2-1.5B-Instruct", "special_tokens_map.json", "models/QwenPrompt/qwen2-1.5b-instruct"),
110
- ("Qwen/Qwen2-1.5B-Instruct", "tokenizer.json", "models/QwenPrompt/qwen2-1.5b-instruct"),
111
- ("Qwen/Qwen2-1.5B-Instruct", "tokenizer_config.json", "models/QwenPrompt/qwen2-1.5b-instruct"),
112
- ("Qwen/Qwen2-1.5B-Instruct", "merges.txt", "models/QwenPrompt/qwen2-1.5b-instruct"),
113
- ("Qwen/Qwen2-1.5B-Instruct", "vocab.json", "models/QwenPrompt/qwen2-1.5b-instruct"),
114
- ],
115
- # Beautiful Prompt
116
- "BeautifulPrompt": [
117
- ("alibaba-pai/pai-bloom-1b1-text2prompt-sd", "config.json", "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd"),
118
- ("alibaba-pai/pai-bloom-1b1-text2prompt-sd", "generation_config.json", "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd"),
119
- ("alibaba-pai/pai-bloom-1b1-text2prompt-sd", "model.safetensors", "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd"),
120
- ("alibaba-pai/pai-bloom-1b1-text2prompt-sd", "special_tokens_map.json", "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd"),
121
- ("alibaba-pai/pai-bloom-1b1-text2prompt-sd", "tokenizer.json", "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd"),
122
- ("alibaba-pai/pai-bloom-1b1-text2prompt-sd", "tokenizer_config.json", "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd"),
123
- ],
124
- # Omost prompt
125
- "OmostPrompt":[
126
- ("lllyasviel/omost-llama-3-8b-4bits", "model-00001-of-00002.safetensors", "models/OmostPrompt/omost-llama-3-8b-4bits"),
127
- ("lllyasviel/omost-llama-3-8b-4bits", "model-00002-of-00002.safetensors", "models/OmostPrompt/omost-llama-3-8b-4bits"),
128
- ("lllyasviel/omost-llama-3-8b-4bits", "tokenizer.json", "models/OmostPrompt/omost-llama-3-8b-4bits"),
129
- ("lllyasviel/omost-llama-3-8b-4bits", "tokenizer_config.json", "models/OmostPrompt/omost-llama-3-8b-4bits"),
130
- ("lllyasviel/omost-llama-3-8b-4bits", "config.json", "models/OmostPrompt/omost-llama-3-8b-4bits"),
131
- ("lllyasviel/omost-llama-3-8b-4bits", "generation_config.json", "models/OmostPrompt/omost-llama-3-8b-4bits"),
132
- ("lllyasviel/omost-llama-3-8b-4bits", "model.safetensors.index.json", "models/OmostPrompt/omost-llama-3-8b-4bits"),
133
- ("lllyasviel/omost-llama-3-8b-4bits", "special_tokens_map.json", "models/OmostPrompt/omost-llama-3-8b-4bits"),
134
- ],
135
- # Translator
136
- "opus-mt-zh-en": [
137
- ("Helsinki-NLP/opus-mt-zh-en", "config.json", "models/translator/opus-mt-zh-en"),
138
- ("Helsinki-NLP/opus-mt-zh-en", "generation_config.json", "models/translator/opus-mt-zh-en"),
139
- ("Helsinki-NLP/opus-mt-zh-en", "metadata.json", "models/translator/opus-mt-zh-en"),
140
- ("Helsinki-NLP/opus-mt-zh-en", "pytorch_model.bin", "models/translator/opus-mt-zh-en"),
141
- ("Helsinki-NLP/opus-mt-zh-en", "source.spm", "models/translator/opus-mt-zh-en"),
142
- ("Helsinki-NLP/opus-mt-zh-en", "target.spm", "models/translator/opus-mt-zh-en"),
143
- ("Helsinki-NLP/opus-mt-zh-en", "tokenizer_config.json", "models/translator/opus-mt-zh-en"),
144
- ("Helsinki-NLP/opus-mt-zh-en", "vocab.json", "models/translator/opus-mt-zh-en"),
145
- ],
146
- # IP-Adapter
147
- "IP-Adapter-SD": [
148
- ("h94/IP-Adapter", "models/image_encoder/model.safetensors", "models/IpAdapter/stable_diffusion/image_encoder"),
149
- ("h94/IP-Adapter", "models/ip-adapter_sd15.bin", "models/IpAdapter/stable_diffusion"),
150
- ],
151
- "IP-Adapter-SDXL": [
152
- ("h94/IP-Adapter", "sdxl_models/image_encoder/model.safetensors", "models/IpAdapter/stable_diffusion_xl/image_encoder"),
153
- ("h94/IP-Adapter", "sdxl_models/ip-adapter_sdxl.bin", "models/IpAdapter/stable_diffusion_xl"),
154
- ],
155
- "SDXL-vae-fp16-fix": [
156
- ("madebyollin/sdxl-vae-fp16-fix", "diffusion_pytorch_model.safetensors", "models/sdxl-vae-fp16-fix")
157
- ],
158
- # Kolors
159
- "Kolors": [
160
- ("Kwai-Kolors/Kolors", "text_encoder/config.json", "models/kolors/Kolors/text_encoder"),
161
- ("Kwai-Kolors/Kolors", "text_encoder/pytorch_model.bin.index.json", "models/kolors/Kolors/text_encoder"),
162
- ("Kwai-Kolors/Kolors", "text_encoder/pytorch_model-00001-of-00007.bin", "models/kolors/Kolors/text_encoder"),
163
- ("Kwai-Kolors/Kolors", "text_encoder/pytorch_model-00002-of-00007.bin", "models/kolors/Kolors/text_encoder"),
164
- ("Kwai-Kolors/Kolors", "text_encoder/pytorch_model-00003-of-00007.bin", "models/kolors/Kolors/text_encoder"),
165
- ("Kwai-Kolors/Kolors", "text_encoder/pytorch_model-00004-of-00007.bin", "models/kolors/Kolors/text_encoder"),
166
- ("Kwai-Kolors/Kolors", "text_encoder/pytorch_model-00005-of-00007.bin", "models/kolors/Kolors/text_encoder"),
167
- ("Kwai-Kolors/Kolors", "text_encoder/pytorch_model-00006-of-00007.bin", "models/kolors/Kolors/text_encoder"),
168
- ("Kwai-Kolors/Kolors", "text_encoder/pytorch_model-00007-of-00007.bin", "models/kolors/Kolors/text_encoder"),
169
- ("Kwai-Kolors/Kolors", "unet/diffusion_pytorch_model.safetensors", "models/kolors/Kolors/unet"),
170
- ("Kwai-Kolors/Kolors", "vae/diffusion_pytorch_model.safetensors", "models/kolors/Kolors/vae"),
171
- ],
172
- # FLUX
173
- "FLUX.1-dev": [
174
- ("black-forest-labs/FLUX.1-dev", "text_encoder/model.safetensors", "models/FLUX/FLUX.1-dev/text_encoder"),
175
- ("black-forest-labs/FLUX.1-dev", "text_encoder_2/config.json", "models/FLUX/FLUX.1-dev/text_encoder_2"),
176
- ("black-forest-labs/FLUX.1-dev", "text_encoder_2/model-00001-of-00002.safetensors", "models/FLUX/FLUX.1-dev/text_encoder_2"),
177
- ("black-forest-labs/FLUX.1-dev", "text_encoder_2/model-00002-of-00002.safetensors", "models/FLUX/FLUX.1-dev/text_encoder_2"),
178
- ("black-forest-labs/FLUX.1-dev", "text_encoder_2/model.safetensors.index.json", "models/FLUX/FLUX.1-dev/text_encoder_2"),
179
- ("black-forest-labs/FLUX.1-dev", "ae.safetensors", "models/FLUX/FLUX.1-dev"),
180
- ("black-forest-labs/FLUX.1-dev", "flux1-dev.safetensors", "models/FLUX/FLUX.1-dev"),
181
- ],
182
- "InstantX/FLUX.1-dev-IP-Adapter": {
183
- "file_list": [
184
- ("InstantX/FLUX.1-dev-IP-Adapter", "ip-adapter.bin", "models/IpAdapter/InstantX/FLUX.1-dev-IP-Adapter"),
185
- ("google/siglip-so400m-patch14-384", "model.safetensors", "models/IpAdapter/InstantX/FLUX.1-dev-IP-Adapter/image_encoder"),
186
- ("google/siglip-so400m-patch14-384", "config.json", "models/IpAdapter/InstantX/FLUX.1-dev-IP-Adapter/image_encoder"),
187
- ],
188
- "load_path": [
189
- "models/IpAdapter/InstantX/FLUX.1-dev-IP-Adapter/ip-adapter.bin",
190
- "models/IpAdapter/InstantX/FLUX.1-dev-IP-Adapter/image_encoder",
191
- ],
192
- },
193
- # RIFE
194
- "RIFE": [
195
- ("AlexWortega/RIFE", "flownet.pkl", "models/RIFE"),
196
- ],
197
- # CogVideo
198
- "CogVideoX-5B": [
199
- ("THUDM/CogVideoX-5b", "text_encoder/config.json", "models/CogVideo/CogVideoX-5b/text_encoder"),
200
- ("THUDM/CogVideoX-5b", "text_encoder/model.safetensors.index.json", "models/CogVideo/CogVideoX-5b/text_encoder"),
201
- ("THUDM/CogVideoX-5b", "text_encoder/model-00001-of-00002.safetensors", "models/CogVideo/CogVideoX-5b/text_encoder"),
202
- ("THUDM/CogVideoX-5b", "text_encoder/model-00002-of-00002.safetensors", "models/CogVideo/CogVideoX-5b/text_encoder"),
203
- ("THUDM/CogVideoX-5b", "transformer/config.json", "models/CogVideo/CogVideoX-5b/transformer"),
204
- ("THUDM/CogVideoX-5b", "transformer/diffusion_pytorch_model.safetensors.index.json", "models/CogVideo/CogVideoX-5b/transformer"),
205
- ("THUDM/CogVideoX-5b", "transformer/diffusion_pytorch_model-00001-of-00002.safetensors", "models/CogVideo/CogVideoX-5b/transformer"),
206
- ("THUDM/CogVideoX-5b", "transformer/diffusion_pytorch_model-00002-of-00002.safetensors", "models/CogVideo/CogVideoX-5b/transformer"),
207
- ("THUDM/CogVideoX-5b", "vae/diffusion_pytorch_model.safetensors", "models/CogVideo/CogVideoX-5b/vae"),
208
- ],
209
- # Stable Diffusion 3.5
210
- "StableDiffusion3.5-large": [
211
- ("stabilityai/stable-diffusion-3.5-large", "sd3.5_large.safetensors", "models/stable_diffusion_3"),
212
- ("stabilityai/stable-diffusion-3.5-large", "text_encoders/clip_l.safetensors", "models/stable_diffusion_3/text_encoders"),
213
- ("stabilityai/stable-diffusion-3.5-large", "text_encoders/clip_g.safetensors", "models/stable_diffusion_3/text_encoders"),
214
- ("stabilityai/stable-diffusion-3.5-large", "text_encoders/t5xxl_fp16.safetensors", "models/stable_diffusion_3/text_encoders"),
215
- ],
216
- }
217
- preset_models_on_modelscope = {
218
- # Hunyuan DiT
219
- "HunyuanDiT": [
220
- ("modelscope/HunyuanDiT", "t2i/clip_text_encoder/pytorch_model.bin", "models/HunyuanDiT/t2i/clip_text_encoder"),
221
- ("modelscope/HunyuanDiT", "t2i/mt5/pytorch_model.bin", "models/HunyuanDiT/t2i/mt5"),
222
- ("modelscope/HunyuanDiT", "t2i/model/pytorch_model_ema.pt", "models/HunyuanDiT/t2i/model"),
223
- ("modelscope/HunyuanDiT", "t2i/sdxl-vae-fp16-fix/diffusion_pytorch_model.bin", "models/HunyuanDiT/t2i/sdxl-vae-fp16-fix"),
224
- ],
225
- # Stable Video Diffusion
226
- "stable-video-diffusion-img2vid-xt": [
227
- ("AI-ModelScope/stable-video-diffusion-img2vid-xt", "svd_xt.safetensors", "models/stable_video_diffusion"),
228
- ],
229
- # ExVideo
230
- "ExVideo-SVD-128f-v1": [
231
- ("ECNU-CILab/ExVideo-SVD-128f-v1", "model.fp16.safetensors", "models/stable_video_diffusion"),
232
- ],
233
- "ExVideo-CogVideoX-LoRA-129f-v1": [
234
- ("ECNU-CILab/ExVideo-CogVideoX-LoRA-129f-v1", "ExVideo-CogVideoX-LoRA-129f-v1.safetensors", "models/lora"),
235
- ],
236
- # Stable Diffusion
237
- "StableDiffusion_v15": [
238
- ("AI-ModelScope/stable-diffusion-v1-5", "v1-5-pruned-emaonly.safetensors", "models/stable_diffusion"),
239
- ],
240
- "DreamShaper_8": [
241
- ("sd_lora/dreamshaper_8", "dreamshaper_8.safetensors", "models/stable_diffusion"),
242
- ],
243
- "AingDiffusion_v12": [
244
- ("sd_lora/aingdiffusion_v12", "aingdiffusion_v12.safetensors", "models/stable_diffusion"),
245
- ],
246
- "Flat2DAnimerge_v45Sharp": [
247
- ("sd_lora/Flat-2D-Animerge", "flat2DAnimerge_v45Sharp.safetensors", "models/stable_diffusion"),
248
- ],
249
- # Textual Inversion
250
- "TextualInversion_VeryBadImageNegative_v1.3": [
251
- ("sd_lora/verybadimagenegative_v1.3", "verybadimagenegative_v1.3.pt", "models/textual_inversion"),
252
- ],
253
- # Stable Diffusion XL
254
- "StableDiffusionXL_v1": [
255
- ("AI-ModelScope/stable-diffusion-xl-base-1.0", "sd_xl_base_1.0.safetensors", "models/stable_diffusion_xl"),
256
- ],
257
- "BluePencilXL_v200": [
258
- ("sd_lora/bluePencilXL_v200", "bluePencilXL_v200.safetensors", "models/stable_diffusion_xl"),
259
- ],
260
- "StableDiffusionXL_Turbo": [
261
- ("AI-ModelScope/sdxl-turbo", "sd_xl_turbo_1.0_fp16.safetensors", "models/stable_diffusion_xl_turbo"),
262
- ],
263
- "SDXL_lora_zyd232_ChineseInkStyle_SDXL_v1_0": [
264
- ("sd_lora/zyd232_ChineseInkStyle_SDXL_v1_0", "zyd232_ChineseInkStyle_SDXL_v1_0.safetensors", "models/lora"),
265
- ],
266
- # Stable Diffusion 3
267
- "StableDiffusion3": [
268
- ("AI-ModelScope/stable-diffusion-3-medium", "sd3_medium_incl_clips_t5xxlfp16.safetensors", "models/stable_diffusion_3"),
269
- ],
270
- "StableDiffusion3_without_T5": [
271
- ("AI-ModelScope/stable-diffusion-3-medium", "sd3_medium_incl_clips.safetensors", "models/stable_diffusion_3"),
272
- ],
273
- # ControlNet
274
- "ControlNet_v11f1p_sd15_depth": [
275
- ("AI-ModelScope/ControlNet-v1-1", "control_v11f1p_sd15_depth.pth", "models/ControlNet"),
276
- ("sd_lora/Annotators", "dpt_hybrid-midas-501f0c75.pt", "models/Annotators")
277
- ],
278
- "ControlNet_v11p_sd15_softedge": [
279
- ("AI-ModelScope/ControlNet-v1-1", "control_v11p_sd15_softedge.pth", "models/ControlNet"),
280
- ("sd_lora/Annotators", "ControlNetHED.pth", "models/Annotators")
281
- ],
282
- "ControlNet_v11f1e_sd15_tile": [
283
- ("AI-ModelScope/ControlNet-v1-1", "control_v11f1e_sd15_tile.pth", "models/ControlNet")
284
- ],
285
- "ControlNet_v11p_sd15_lineart": [
286
- ("AI-ModelScope/ControlNet-v1-1", "control_v11p_sd15_lineart.pth", "models/ControlNet"),
287
- ("sd_lora/Annotators", "sk_model.pth", "models/Annotators"),
288
- ("sd_lora/Annotators", "sk_model2.pth", "models/Annotators")
289
- ],
290
- "ControlNet_union_sdxl_promax": [
291
- ("AI-ModelScope/controlnet-union-sdxl-1.0", "diffusion_pytorch_model_promax.safetensors", "models/ControlNet/controlnet_union"),
292
- ("sd_lora/Annotators", "dpt_hybrid-midas-501f0c75.pt", "models/Annotators")
293
- ],
294
- "Annotators:Depth": [
295
- ("sd_lora/Annotators", "dpt_hybrid-midas-501f0c75.pt", "models/Annotators"),
296
- ],
297
- "Annotators:Softedge": [
298
- ("sd_lora/Annotators", "ControlNetHED.pth", "models/Annotators"),
299
- ],
300
- "Annotators:Lineart": [
301
- ("sd_lora/Annotators", "sk_model.pth", "models/Annotators"),
302
- ("sd_lora/Annotators", "sk_model2.pth", "models/Annotators"),
303
- ],
304
- "Annotators:Normal": [
305
- ("sd_lora/Annotators", "scannet.pt", "models/Annotators"),
306
- ],
307
- "Annotators:Openpose": [
308
- ("sd_lora/Annotators", "body_pose_model.pth", "models/Annotators"),
309
- ("sd_lora/Annotators", "facenet.pth", "models/Annotators"),
310
- ("sd_lora/Annotators", "hand_pose_model.pth", "models/Annotators"),
311
- ],
312
- # AnimateDiff
313
- "AnimateDiff_v2": [
314
- ("Shanghai_AI_Laboratory/animatediff", "mm_sd_v15_v2.ckpt", "models/AnimateDiff"),
315
- ],
316
- "AnimateDiff_xl_beta": [
317
- ("Shanghai_AI_Laboratory/animatediff", "mm_sdxl_v10_beta.ckpt", "models/AnimateDiff"),
318
- ],
319
- # RIFE
320
- "RIFE": [
321
- ("Damo_XR_Lab/cv_rife_video-frame-interpolation", "flownet.pkl", "models/RIFE"),
322
- ],
323
- # Qwen Prompt
324
- "QwenPrompt": {
325
- "file_list": [
326
- ("qwen/Qwen2-1.5B-Instruct", "config.json", "models/QwenPrompt/qwen2-1.5b-instruct"),
327
- ("qwen/Qwen2-1.5B-Instruct", "generation_config.json", "models/QwenPrompt/qwen2-1.5b-instruct"),
328
- ("qwen/Qwen2-1.5B-Instruct", "model.safetensors", "models/QwenPrompt/qwen2-1.5b-instruct"),
329
- ("qwen/Qwen2-1.5B-Instruct", "special_tokens_map.json", "models/QwenPrompt/qwen2-1.5b-instruct"),
330
- ("qwen/Qwen2-1.5B-Instruct", "tokenizer.json", "models/QwenPrompt/qwen2-1.5b-instruct"),
331
- ("qwen/Qwen2-1.5B-Instruct", "tokenizer_config.json", "models/QwenPrompt/qwen2-1.5b-instruct"),
332
- ("qwen/Qwen2-1.5B-Instruct", "merges.txt", "models/QwenPrompt/qwen2-1.5b-instruct"),
333
- ("qwen/Qwen2-1.5B-Instruct", "vocab.json", "models/QwenPrompt/qwen2-1.5b-instruct"),
334
- ],
335
- "load_path": [
336
- "models/QwenPrompt/qwen2-1.5b-instruct",
337
- ],
338
- },
339
- # Beautiful Prompt
340
- "BeautifulPrompt": {
341
- "file_list": [
342
- ("AI-ModelScope/pai-bloom-1b1-text2prompt-sd", "config.json", "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd"),
343
- ("AI-ModelScope/pai-bloom-1b1-text2prompt-sd", "generation_config.json", "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd"),
344
- ("AI-ModelScope/pai-bloom-1b1-text2prompt-sd", "model.safetensors", "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd"),
345
- ("AI-ModelScope/pai-bloom-1b1-text2prompt-sd", "special_tokens_map.json", "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd"),
346
- ("AI-ModelScope/pai-bloom-1b1-text2prompt-sd", "tokenizer.json", "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd"),
347
- ("AI-ModelScope/pai-bloom-1b1-text2prompt-sd", "tokenizer_config.json", "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd"),
348
- ],
349
- "load_path": [
350
- "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd",
351
- ],
352
- },
353
- # Omost prompt
354
- "OmostPrompt": {
355
- "file_list": [
356
- ("Omost/omost-llama-3-8b-4bits", "model-00001-of-00002.safetensors", "models/OmostPrompt/omost-llama-3-8b-4bits"),
357
- ("Omost/omost-llama-3-8b-4bits", "model-00002-of-00002.safetensors", "models/OmostPrompt/omost-llama-3-8b-4bits"),
358
- ("Omost/omost-llama-3-8b-4bits", "tokenizer.json", "models/OmostPrompt/omost-llama-3-8b-4bits"),
359
- ("Omost/omost-llama-3-8b-4bits", "tokenizer_config.json", "models/OmostPrompt/omost-llama-3-8b-4bits"),
360
- ("Omost/omost-llama-3-8b-4bits", "config.json", "models/OmostPrompt/omost-llama-3-8b-4bits"),
361
- ("Omost/omost-llama-3-8b-4bits", "generation_config.json", "models/OmostPrompt/omost-llama-3-8b-4bits"),
362
- ("Omost/omost-llama-3-8b-4bits", "model.safetensors.index.json", "models/OmostPrompt/omost-llama-3-8b-4bits"),
363
- ("Omost/omost-llama-3-8b-4bits", "special_tokens_map.json", "models/OmostPrompt/omost-llama-3-8b-4bits"),
364
- ],
365
- "load_path": [
366
- "models/OmostPrompt/omost-llama-3-8b-4bits",
367
- ],
368
- },
369
- # Translator
370
- "opus-mt-zh-en": {
371
- "file_list": [
372
- ("moxying/opus-mt-zh-en", "config.json", "models/translator/opus-mt-zh-en"),
373
- ("moxying/opus-mt-zh-en", "generation_config.json", "models/translator/opus-mt-zh-en"),
374
- ("moxying/opus-mt-zh-en", "metadata.json", "models/translator/opus-mt-zh-en"),
375
- ("moxying/opus-mt-zh-en", "pytorch_model.bin", "models/translator/opus-mt-zh-en"),
376
- ("moxying/opus-mt-zh-en", "source.spm", "models/translator/opus-mt-zh-en"),
377
- ("moxying/opus-mt-zh-en", "target.spm", "models/translator/opus-mt-zh-en"),
378
- ("moxying/opus-mt-zh-en", "tokenizer_config.json", "models/translator/opus-mt-zh-en"),
379
- ("moxying/opus-mt-zh-en", "vocab.json", "models/translator/opus-mt-zh-en"),
380
- ],
381
- "load_path": [
382
- "models/translator/opus-mt-zh-en",
383
- ],
384
- },
385
- # IP-Adapter
386
- "IP-Adapter-SD": [
387
- ("AI-ModelScope/IP-Adapter", "models/image_encoder/model.safetensors", "models/IpAdapter/stable_diffusion/image_encoder"),
388
- ("AI-ModelScope/IP-Adapter", "models/ip-adapter_sd15.bin", "models/IpAdapter/stable_diffusion"),
389
- ],
390
- "IP-Adapter-SDXL": [
391
- ("AI-ModelScope/IP-Adapter", "sdxl_models/image_encoder/model.safetensors", "models/IpAdapter/stable_diffusion_xl/image_encoder"),
392
- ("AI-ModelScope/IP-Adapter", "sdxl_models/ip-adapter_sdxl.bin", "models/IpAdapter/stable_diffusion_xl"),
393
- ],
394
- # Kolors
395
- "Kolors": {
396
- "file_list": [
397
- ("Kwai-Kolors/Kolors", "text_encoder/config.json", "models/kolors/Kolors/text_encoder"),
398
- ("Kwai-Kolors/Kolors", "text_encoder/pytorch_model.bin.index.json", "models/kolors/Kolors/text_encoder"),
399
- ("Kwai-Kolors/Kolors", "text_encoder/pytorch_model-00001-of-00007.bin", "models/kolors/Kolors/text_encoder"),
400
- ("Kwai-Kolors/Kolors", "text_encoder/pytorch_model-00002-of-00007.bin", "models/kolors/Kolors/text_encoder"),
401
- ("Kwai-Kolors/Kolors", "text_encoder/pytorch_model-00003-of-00007.bin", "models/kolors/Kolors/text_encoder"),
402
- ("Kwai-Kolors/Kolors", "text_encoder/pytorch_model-00004-of-00007.bin", "models/kolors/Kolors/text_encoder"),
403
- ("Kwai-Kolors/Kolors", "text_encoder/pytorch_model-00005-of-00007.bin", "models/kolors/Kolors/text_encoder"),
404
- ("Kwai-Kolors/Kolors", "text_encoder/pytorch_model-00006-of-00007.bin", "models/kolors/Kolors/text_encoder"),
405
- ("Kwai-Kolors/Kolors", "text_encoder/pytorch_model-00007-of-00007.bin", "models/kolors/Kolors/text_encoder"),
406
- ("Kwai-Kolors/Kolors", "unet/diffusion_pytorch_model.safetensors", "models/kolors/Kolors/unet"),
407
- ("Kwai-Kolors/Kolors", "vae/diffusion_pytorch_model.safetensors", "models/kolors/Kolors/vae"),
408
- ],
409
- "load_path": [
410
- "models/kolors/Kolors/text_encoder",
411
- "models/kolors/Kolors/unet/diffusion_pytorch_model.safetensors",
412
- "models/kolors/Kolors/vae/diffusion_pytorch_model.safetensors",
413
- ],
414
- },
415
- "SDXL-vae-fp16-fix": [
416
- ("AI-ModelScope/sdxl-vae-fp16-fix", "diffusion_pytorch_model.safetensors", "models/sdxl-vae-fp16-fix")
417
- ],
418
- # FLUX
419
- "FLUX.1-dev": {
420
- "file_list": [
421
- ("AI-ModelScope/FLUX.1-dev", "text_encoder/model.safetensors", "models/FLUX/FLUX.1-dev/text_encoder"),
422
- ("AI-ModelScope/FLUX.1-dev", "text_encoder_2/config.json", "models/FLUX/FLUX.1-dev/text_encoder_2"),
423
- ("AI-ModelScope/FLUX.1-dev", "text_encoder_2/model-00001-of-00002.safetensors", "models/FLUX/FLUX.1-dev/text_encoder_2"),
424
- ("AI-ModelScope/FLUX.1-dev", "text_encoder_2/model-00002-of-00002.safetensors", "models/FLUX/FLUX.1-dev/text_encoder_2"),
425
- ("AI-ModelScope/FLUX.1-dev", "text_encoder_2/model.safetensors.index.json", "models/FLUX/FLUX.1-dev/text_encoder_2"),
426
- ("AI-ModelScope/FLUX.1-dev", "ae.safetensors", "models/FLUX/FLUX.1-dev"),
427
- ("AI-ModelScope/FLUX.1-dev", "flux1-dev.safetensors", "models/FLUX/FLUX.1-dev"),
428
- ],
429
- "load_path": [
430
- "models/FLUX/FLUX.1-dev/text_encoder/model.safetensors",
431
- "models/FLUX/FLUX.1-dev/text_encoder_2",
432
- "models/FLUX/FLUX.1-dev/ae.safetensors",
433
- "models/FLUX/FLUX.1-dev/flux1-dev.safetensors"
434
- ],
435
- },
436
- "FLUX.1-schnell": {
437
- "file_list": [
438
- ("AI-ModelScope/FLUX.1-dev", "text_encoder/model.safetensors", "models/FLUX/FLUX.1-dev/text_encoder"),
439
- ("AI-ModelScope/FLUX.1-dev", "text_encoder_2/config.json", "models/FLUX/FLUX.1-dev/text_encoder_2"),
440
- ("AI-ModelScope/FLUX.1-dev", "text_encoder_2/model-00001-of-00002.safetensors", "models/FLUX/FLUX.1-dev/text_encoder_2"),
441
- ("AI-ModelScope/FLUX.1-dev", "text_encoder_2/model-00002-of-00002.safetensors", "models/FLUX/FLUX.1-dev/text_encoder_2"),
442
- ("AI-ModelScope/FLUX.1-dev", "text_encoder_2/model.safetensors.index.json", "models/FLUX/FLUX.1-dev/text_encoder_2"),
443
- ("AI-ModelScope/FLUX.1-dev", "ae.safetensors", "models/FLUX/FLUX.1-dev"),
444
- ("AI-ModelScope/FLUX.1-schnell", "flux1-schnell.safetensors", "models/FLUX/FLUX.1-schnell"),
445
- ],
446
- "load_path": [
447
- "models/FLUX/FLUX.1-dev/text_encoder/model.safetensors",
448
- "models/FLUX/FLUX.1-dev/text_encoder_2",
449
- "models/FLUX/FLUX.1-dev/ae.safetensors",
450
- "models/FLUX/FLUX.1-schnell/flux1-schnell.safetensors"
451
- ],
452
- },
453
- "InstantX/FLUX.1-dev-Controlnet-Union-alpha": [
454
- ("InstantX/FLUX.1-dev-Controlnet-Union-alpha", "diffusion_pytorch_model.safetensors", "models/ControlNet/InstantX/FLUX.1-dev-Controlnet-Union-alpha"),
455
- ],
456
- "jasperai/Flux.1-dev-Controlnet-Depth": [
457
- ("jasperai/Flux.1-dev-Controlnet-Depth", "diffusion_pytorch_model.safetensors", "models/ControlNet/jasperai/Flux.1-dev-Controlnet-Depth"),
458
- ],
459
- "jasperai/Flux.1-dev-Controlnet-Surface-Normals": [
460
- ("jasperai/Flux.1-dev-Controlnet-Surface-Normals", "diffusion_pytorch_model.safetensors", "models/ControlNet/jasperai/Flux.1-dev-Controlnet-Surface-Normals"),
461
- ],
462
- "jasperai/Flux.1-dev-Controlnet-Upscaler": [
463
- ("jasperai/Flux.1-dev-Controlnet-Upscaler", "diffusion_pytorch_model.safetensors", "models/ControlNet/jasperai/Flux.1-dev-Controlnet-Upscaler"),
464
- ],
465
- "alimama-creative/FLUX.1-dev-Controlnet-Inpainting-Alpha": [
466
- ("alimama-creative/FLUX.1-dev-Controlnet-Inpainting-Alpha", "diffusion_pytorch_model.safetensors", "models/ControlNet/alimama-creative/FLUX.1-dev-Controlnet-Inpainting-Alpha"),
467
- ],
468
- "alimama-creative/FLUX.1-dev-Controlnet-Inpainting-Beta": [
469
- ("alimama-creative/FLUX.1-dev-Controlnet-Inpainting-Beta", "diffusion_pytorch_model.safetensors", "models/ControlNet/alimama-creative/FLUX.1-dev-Controlnet-Inpainting-Beta"),
470
- ],
471
- "Shakker-Labs/FLUX.1-dev-ControlNet-Depth": [
472
- ("Shakker-Labs/FLUX.1-dev-ControlNet-Depth", "diffusion_pytorch_model.safetensors", "models/ControlNet/Shakker-Labs/FLUX.1-dev-ControlNet-Depth"),
473
- ],
474
- "Shakker-Labs/FLUX.1-dev-ControlNet-Union-Pro": [
475
- ("Shakker-Labs/FLUX.1-dev-ControlNet-Union-Pro", "diffusion_pytorch_model.safetensors", "models/ControlNet/Shakker-Labs/FLUX.1-dev-ControlNet-Union-Pro"),
476
- ],
477
- "InstantX/FLUX.1-dev-IP-Adapter": {
478
- "file_list": [
479
- ("InstantX/FLUX.1-dev-IP-Adapter", "ip-adapter.bin", "models/IpAdapter/InstantX/FLUX.1-dev-IP-Adapter"),
480
- ("AI-ModelScope/siglip-so400m-patch14-384", "model.safetensors", "models/IpAdapter/InstantX/FLUX.1-dev-IP-Adapter/image_encoder"),
481
- ("AI-ModelScope/siglip-so400m-patch14-384", "config.json", "models/IpAdapter/InstantX/FLUX.1-dev-IP-Adapter/image_encoder"),
482
- ],
483
- "load_path": [
484
- "models/IpAdapter/InstantX/FLUX.1-dev-IP-Adapter/ip-adapter.bin",
485
- "models/IpAdapter/InstantX/FLUX.1-dev-IP-Adapter/image_encoder",
486
- ],
487
- },
488
- # ESRGAN
489
- "ESRGAN_x4": [
490
- ("AI-ModelScope/Real-ESRGAN", "RealESRGAN_x4.pth", "models/ESRGAN"),
491
- ],
492
- # RIFE
493
- "RIFE": [
494
- ("AI-ModelScope/RIFE", "flownet.pkl", "models/RIFE"),
495
- ],
496
- # Omnigen
497
- "OmniGen-v1": {
498
- "file_list": [
499
- ("BAAI/OmniGen-v1", "vae/diffusion_pytorch_model.safetensors", "models/OmniGen/OmniGen-v1/vae"),
500
- ("BAAI/OmniGen-v1", "model.safetensors", "models/OmniGen/OmniGen-v1"),
501
- ("BAAI/OmniGen-v1", "config.json", "models/OmniGen/OmniGen-v1"),
502
- ("BAAI/OmniGen-v1", "special_tokens_map.json", "models/OmniGen/OmniGen-v1"),
503
- ("BAAI/OmniGen-v1", "tokenizer_config.json", "models/OmniGen/OmniGen-v1"),
504
- ("BAAI/OmniGen-v1", "tokenizer.json", "models/OmniGen/OmniGen-v1"),
505
- ],
506
- "load_path": [
507
- "models/OmniGen/OmniGen-v1/vae/diffusion_pytorch_model.safetensors",
508
- "models/OmniGen/OmniGen-v1/model.safetensors",
509
- ]
510
- },
511
- # CogVideo
512
- "CogVideoX-5B": {
513
- "file_list": [
514
- ("ZhipuAI/CogVideoX-5b", "text_encoder/config.json", "models/CogVideo/CogVideoX-5b/text_encoder"),
515
- ("ZhipuAI/CogVideoX-5b", "text_encoder/model.safetensors.index.json", "models/CogVideo/CogVideoX-5b/text_encoder"),
516
- ("ZhipuAI/CogVideoX-5b", "text_encoder/model-00001-of-00002.safetensors", "models/CogVideo/CogVideoX-5b/text_encoder"),
517
- ("ZhipuAI/CogVideoX-5b", "text_encoder/model-00002-of-00002.safetensors", "models/CogVideo/CogVideoX-5b/text_encoder"),
518
- ("ZhipuAI/CogVideoX-5b", "transformer/config.json", "models/CogVideo/CogVideoX-5b/transformer"),
519
- ("ZhipuAI/CogVideoX-5b", "transformer/diffusion_pytorch_model.safetensors.index.json", "models/CogVideo/CogVideoX-5b/transformer"),
520
- ("ZhipuAI/CogVideoX-5b", "transformer/diffusion_pytorch_model-00001-of-00002.safetensors", "models/CogVideo/CogVideoX-5b/transformer"),
521
- ("ZhipuAI/CogVideoX-5b", "transformer/diffusion_pytorch_model-00002-of-00002.safetensors", "models/CogVideo/CogVideoX-5b/transformer"),
522
- ("ZhipuAI/CogVideoX-5b", "vae/diffusion_pytorch_model.safetensors", "models/CogVideo/CogVideoX-5b/vae"),
523
- ],
524
- "load_path": [
525
- "models/CogVideo/CogVideoX-5b/text_encoder",
526
- "models/CogVideo/CogVideoX-5b/transformer",
527
- "models/CogVideo/CogVideoX-5b/vae/diffusion_pytorch_model.safetensors",
528
- ],
529
- },
530
- # Stable Diffusion 3.5
531
- "StableDiffusion3.5-large": [
532
- ("AI-ModelScope/stable-diffusion-3.5-large", "sd3.5_large.safetensors", "models/stable_diffusion_3"),
533
- ("AI-ModelScope/stable-diffusion-3.5-large", "text_encoders/clip_l.safetensors", "models/stable_diffusion_3/text_encoders"),
534
- ("AI-ModelScope/stable-diffusion-3.5-large", "text_encoders/clip_g.safetensors", "models/stable_diffusion_3/text_encoders"),
535
- ("AI-ModelScope/stable-diffusion-3.5-large", "text_encoders/t5xxl_fp16.safetensors", "models/stable_diffusion_3/text_encoders"),
536
- ],
537
- "StableDiffusion3.5-medium": [
538
- ("AI-ModelScope/stable-diffusion-3.5-medium", "sd3.5_medium.safetensors", "models/stable_diffusion_3"),
539
- ("AI-ModelScope/stable-diffusion-3.5-large", "text_encoders/clip_l.safetensors", "models/stable_diffusion_3/text_encoders"),
540
- ("AI-ModelScope/stable-diffusion-3.5-large", "text_encoders/clip_g.safetensors", "models/stable_diffusion_3/text_encoders"),
541
- ("AI-ModelScope/stable-diffusion-3.5-large", "text_encoders/t5xxl_fp16.safetensors", "models/stable_diffusion_3/text_encoders"),
542
- ],
543
- "StableDiffusion3.5-large-turbo": [
544
- ("AI-ModelScope/stable-diffusion-3.5-large-turbo", "sd3.5_large_turbo.safetensors", "models/stable_diffusion_3"),
545
- ("AI-ModelScope/stable-diffusion-3.5-large", "text_encoders/clip_l.safetensors", "models/stable_diffusion_3/text_encoders"),
546
- ("AI-ModelScope/stable-diffusion-3.5-large", "text_encoders/clip_g.safetensors", "models/stable_diffusion_3/text_encoders"),
547
- ("AI-ModelScope/stable-diffusion-3.5-large", "text_encoders/t5xxl_fp16.safetensors", "models/stable_diffusion_3/text_encoders"),
548
- ],
549
- "HunyuanVideo":{
550
- "file_list": [
551
- ("AI-ModelScope/clip-vit-large-patch14", "model.safetensors", "models/HunyuanVideo/text_encoder"),
552
- ("DiffSynth-Studio/HunyuanVideo_MLLM_text_encoder", "model-00001-of-00004.safetensors", "models/HunyuanVideo/text_encoder_2"),
553
- ("DiffSynth-Studio/HunyuanVideo_MLLM_text_encoder", "model-00002-of-00004.safetensors", "models/HunyuanVideo/text_encoder_2"),
554
- ("DiffSynth-Studio/HunyuanVideo_MLLM_text_encoder", "model-00003-of-00004.safetensors", "models/HunyuanVideo/text_encoder_2"),
555
- ("DiffSynth-Studio/HunyuanVideo_MLLM_text_encoder", "model-00004-of-00004.safetensors", "models/HunyuanVideo/text_encoder_2"),
556
- ("DiffSynth-Studio/HunyuanVideo_MLLM_text_encoder", "config.json", "models/HunyuanVideo/text_encoder_2"),
557
- ("DiffSynth-Studio/HunyuanVideo_MLLM_text_encoder", "model.safetensors.index.json", "models/HunyuanVideo/text_encoder_2"),
558
- ("AI-ModelScope/HunyuanVideo", "hunyuan-video-t2v-720p/vae/pytorch_model.pt", "models/HunyuanVideo/vae"),
559
- ("AI-ModelScope/HunyuanVideo", "hunyuan-video-t2v-720p/transformers/mp_rank_00_model_states.pt", "models/HunyuanVideo/transformers")
560
- ],
561
- "load_path": [
562
- "models/HunyuanVideo/text_encoder/model.safetensors",
563
- "models/HunyuanVideo/text_encoder_2",
564
- "models/HunyuanVideo/vae/pytorch_model.pt",
565
- "models/HunyuanVideo/transformers/mp_rank_00_model_states.pt"
566
- ],
567
- },
568
- "HunyuanVideoI2V":{
569
- "file_list": [
570
- ("AI-ModelScope/clip-vit-large-patch14", "model.safetensors", "models/HunyuanVideoI2V/text_encoder"),
571
- ("AI-ModelScope/llava-llama-3-8b-v1_1-transformers", "model-00001-of-00004.safetensors", "models/HunyuanVideoI2V/text_encoder_2"),
572
- ("AI-ModelScope/llava-llama-3-8b-v1_1-transformers", "model-00002-of-00004.safetensors", "models/HunyuanVideoI2V/text_encoder_2"),
573
- ("AI-ModelScope/llava-llama-3-8b-v1_1-transformers", "model-00003-of-00004.safetensors", "models/HunyuanVideoI2V/text_encoder_2"),
574
- ("AI-ModelScope/llava-llama-3-8b-v1_1-transformers", "model-00004-of-00004.safetensors", "models/HunyuanVideoI2V/text_encoder_2"),
575
- ("AI-ModelScope/llava-llama-3-8b-v1_1-transformers", "config.json", "models/HunyuanVideoI2V/text_encoder_2"),
576
- ("AI-ModelScope/llava-llama-3-8b-v1_1-transformers", "model.safetensors.index.json", "models/HunyuanVideoI2V/text_encoder_2"),
577
- ("AI-ModelScope/HunyuanVideo-I2V", "hunyuan-video-i2v-720p/vae/pytorch_model.pt", "models/HunyuanVideoI2V/vae"),
578
- ("AI-ModelScope/HunyuanVideo-I2V", "hunyuan-video-i2v-720p/transformers/mp_rank_00_model_states.pt", "models/HunyuanVideoI2V/transformers")
579
- ],
580
- "load_path": [
581
- "models/HunyuanVideoI2V/text_encoder/model.safetensors",
582
- "models/HunyuanVideoI2V/text_encoder_2",
583
- "models/HunyuanVideoI2V/vae/pytorch_model.pt",
584
- "models/HunyuanVideoI2V/transformers/mp_rank_00_model_states.pt"
585
- ],
586
- },
587
- "HunyuanVideo-fp8":{
588
- "file_list": [
589
- ("AI-ModelScope/clip-vit-large-patch14", "model.safetensors", "models/HunyuanVideo/text_encoder"),
590
- ("DiffSynth-Studio/HunyuanVideo_MLLM_text_encoder", "model-00001-of-00004.safetensors", "models/HunyuanVideo/text_encoder_2"),
591
- ("DiffSynth-Studio/HunyuanVideo_MLLM_text_encoder", "model-00002-of-00004.safetensors", "models/HunyuanVideo/text_encoder_2"),
592
- ("DiffSynth-Studio/HunyuanVideo_MLLM_text_encoder", "model-00003-of-00004.safetensors", "models/HunyuanVideo/text_encoder_2"),
593
- ("DiffSynth-Studio/HunyuanVideo_MLLM_text_encoder", "model-00004-of-00004.safetensors", "models/HunyuanVideo/text_encoder_2"),
594
- ("DiffSynth-Studio/HunyuanVideo_MLLM_text_encoder", "config.json", "models/HunyuanVideo/text_encoder_2"),
595
- ("DiffSynth-Studio/HunyuanVideo_MLLM_text_encoder", "model.safetensors.index.json", "models/HunyuanVideo/text_encoder_2"),
596
- ("AI-ModelScope/HunyuanVideo", "hunyuan-video-t2v-720p/vae/pytorch_model.pt", "models/HunyuanVideo/vae"),
597
- ("DiffSynth-Studio/HunyuanVideo-safetensors", "model.fp8.safetensors", "models/HunyuanVideo/transformers")
598
- ],
599
- "load_path": [
600
- "models/HunyuanVideo/text_encoder/model.safetensors",
601
- "models/HunyuanVideo/text_encoder_2",
602
- "models/HunyuanVideo/vae/pytorch_model.pt",
603
- "models/HunyuanVideo/transformers/model.fp8.safetensors"
604
- ],
605
- },
606
- }
607
- Preset_model_id: TypeAlias = Literal[
608
- "HunyuanDiT",
609
- "stable-video-diffusion-img2vid-xt",
610
- "ExVideo-SVD-128f-v1",
611
- "ExVideo-CogVideoX-LoRA-129f-v1",
612
- "StableDiffusion_v15",
613
- "DreamShaper_8",
614
- "AingDiffusion_v12",
615
- "Flat2DAnimerge_v45Sharp",
616
- "TextualInversion_VeryBadImageNegative_v1.3",
617
- "StableDiffusionXL_v1",
618
- "BluePencilXL_v200",
619
- "StableDiffusionXL_Turbo",
620
- "ControlNet_v11f1p_sd15_depth",
621
- "ControlNet_v11p_sd15_softedge",
622
- "ControlNet_v11f1e_sd15_tile",
623
- "ControlNet_v11p_sd15_lineart",
624
- "AnimateDiff_v2",
625
- "AnimateDiff_xl_beta",
626
- "RIFE",
627
- "BeautifulPrompt",
628
- "opus-mt-zh-en",
629
- "IP-Adapter-SD",
630
- "IP-Adapter-SDXL",
631
- "StableDiffusion3",
632
- "StableDiffusion3_without_T5",
633
- "Kolors",
634
- "SDXL-vae-fp16-fix",
635
- "ControlNet_union_sdxl_promax",
636
- "FLUX.1-dev",
637
- "FLUX.1-schnell",
638
- "InstantX/FLUX.1-dev-Controlnet-Union-alpha",
639
- "jasperai/Flux.1-dev-Controlnet-Depth",
640
- "jasperai/Flux.1-dev-Controlnet-Surface-Normals",
641
- "jasperai/Flux.1-dev-Controlnet-Upscaler",
642
- "alimama-creative/FLUX.1-dev-Controlnet-Inpainting-Alpha",
643
- "alimama-creative/FLUX.1-dev-Controlnet-Inpainting-Beta",
644
- "Shakker-Labs/FLUX.1-dev-ControlNet-Depth",
645
- "Shakker-Labs/FLUX.1-dev-ControlNet-Union-Pro",
646
- "InstantX/FLUX.1-dev-IP-Adapter",
647
- "SDXL_lora_zyd232_ChineseInkStyle_SDXL_v1_0",
648
- "QwenPrompt",
649
- "OmostPrompt",
650
- "ESRGAN_x4",
651
- "RIFE",
652
- "OmniGen-v1",
653
- "CogVideoX-5B",
654
- "Annotators:Depth",
655
- "Annotators:Softedge",
656
- "Annotators:Lineart",
657
- "Annotators:Normal",
658
- "Annotators:Openpose",
659
- "StableDiffusion3.5-large",
660
- "StableDiffusion3.5-medium",
661
- "HunyuanVideo",
662
- "HunyuanVideo-fp8",
663
- "HunyuanVideoI2V",
664
- ]
 
1
+ from typing_extensions import Literal, TypeAlias
2
+ from ..models.wan_video_dit import WanModel
3
+ from ..models.wan_video_text_encoder import WanTextEncoder
4
+ from ..models.wan_video_vae import WanVideoVAE
5
+
6
+
7
+ model_loader_configs = [
8
+ # These configs are provided for detecting model type automatically.
9
+ # The format is (state_dict_keys_hash, state_dict_keys_hash_with_shape, model_names, model_classes, model_resource)
10
+ (None, "9269f8db9040a9d860eaca435be61814", ["wan_video_dit"], [WanModel], "civitai"),
11
+ (None, "aafcfd9672c3a2456dc46e1cb6e52c70", ["wan_video_dit"], [WanModel], "civitai"),
12
+ (None, "6bfcfb3b342cb286ce886889d519a77e", ["wan_video_dit"], [WanModel], "civitai"),
13
+ (None, "cb104773c6c2cb6df4f9529ad5c60d0b", ["wan_video_dit"], [WanModel], "diffusers"),
14
+ (None, "9c8818c2cbea55eca56c7b447df170da", ["wan_video_text_encoder"], [WanTextEncoder], "civitai"),
15
+ (None, "1378ea763357eea97acdef78e65d6d96", ["wan_video_vae"], [WanVideoVAE], "civitai"),
16
+ (None, "ccc42284ea13e1ad04693284c7a09be6", ["wan_video_vae"], [WanVideoVAE], "civitai"),
17
+ ]