shepnerd commited on
Commit
9d9e61a
·
verified ·
1 Parent(s): 6a7a45e

Push model using huggingface_hub.

Browse files
Files changed (3) hide show
  1. README.md +11 -0
  2. config.json +497 -0
  3. model.safetensors +3 -0
README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ pipeline_tag: video feature extraction
4
+ tags:
5
+ - model_hub_mixin
6
+ - pytorch_model_hub_mixin
7
+ ---
8
+
9
+ This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration:
10
+ - Library: [More Information Needed]
11
+ - Docs: [More Information Needed]
config.json ADDED
@@ -0,0 +1,497 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "TextEncoders": {
3
+ "bert": {
4
+ "config": "configs/config_bert.json",
5
+ "d_model": 768,
6
+ "fusion_layer": 9,
7
+ "name": "bert_base",
8
+ "pretrained": "bert-base-uncased"
9
+ },
10
+ "bert_large": {
11
+ "config": "configs/config_bert_large.json",
12
+ "d_model": 1024,
13
+ "fusion_layer": 19,
14
+ "name": "bert_large",
15
+ "pretrained": "bert-large-uncased"
16
+ },
17
+ "med_bert": {
18
+ "config": "configs/med_config.json",
19
+ "d_model": 768,
20
+ "name": "med_bert_base",
21
+ "pretrained": "bert-base-uncased"
22
+ },
23
+ "med_bert_large": {
24
+ "config": "configs/med_large_config.json",
25
+ "d_model": 768,
26
+ "name": "med_bert_large",
27
+ "pretrained": "bert-base-uncased"
28
+ }
29
+ },
30
+ "VisionEncoders": {},
31
+ "auto_resume": true,
32
+ "available_corpus": {
33
+ "anet_ret_train": {
34
+ "anno_path": "your_path",
35
+ "data_root": "",
36
+ "is_paragraph_retrieval": true,
37
+ "max_txt_l": 150,
38
+ "media_type": "video"
39
+ },
40
+ "anet_ret_val": {
41
+ "anno_path": "your_path",
42
+ "data_root": "",
43
+ "is_paragraph_retrieval": true,
44
+ "max_txt_l": 150,
45
+ "media_type": "video"
46
+ },
47
+ "audiocaps_ret_test": {
48
+ "anno_path": "your_path",
49
+ "data_root": "",
50
+ "media_type": "audio"
51
+ },
52
+ "audiocaps_ret_train": {
53
+ "anno_path": "your_path",
54
+ "data_root": "",
55
+ "media_type": "audio"
56
+ },
57
+ "cc12m": {
58
+ "anno_path": "your_path",
59
+ "data_root": "",
60
+ "media_type": "image"
61
+ },
62
+ "cc3m": {
63
+ "anno_path": "your_path",
64
+ "data_root": "",
65
+ "media_type": "image"
66
+ },
67
+ "cc3m_debug": {
68
+ "anno_path": "your_path",
69
+ "data_root": "",
70
+ "media_type": "image"
71
+ },
72
+ "charades_mc_test": {
73
+ "anno_path": "your_path",
74
+ "data_root": "",
75
+ "media_type": "video"
76
+ },
77
+ "clothov1_ret_test": {
78
+ "anno_path": "your_path",
79
+ "data_root": "",
80
+ "media_type": "audio"
81
+ },
82
+ "clothov1_ret_train": {
83
+ "anno_path": "your_path",
84
+ "data_root": "",
85
+ "media_type": "audio"
86
+ },
87
+ "clothov2_ret_test": {
88
+ "anno_path": "your_path",
89
+ "data_root": "",
90
+ "media_type": "audio"
91
+ },
92
+ "clothov2_ret_train": {
93
+ "anno_path": "your_path",
94
+ "data_root": "",
95
+ "media_type": "audio"
96
+ },
97
+ "coco": {
98
+ "anno_path": "your_path",
99
+ "data_root": "",
100
+ "jump_filter": true,
101
+ "media_type": "image"
102
+ },
103
+ "data_25m": [
104
+ {
105
+ "anno_path": "your_path",
106
+ "data_root": "",
107
+ "media_type": "video"
108
+ },
109
+ {
110
+ "anno_path": "your_path",
111
+ "data_root": "",
112
+ "media_type": "image"
113
+ },
114
+ {
115
+ "anno_path": "your_path",
116
+ "data_root": "",
117
+ "jump_filter": true,
118
+ "media_type": "image"
119
+ },
120
+ {
121
+ "anno_path": "your_path",
122
+ "data_root": "",
123
+ "jump_filter": true,
124
+ "media_type": "image"
125
+ },
126
+ {
127
+ "anno_path": "your_path",
128
+ "data_root": "",
129
+ "media_type": "image"
130
+ },
131
+ {
132
+ "anno_path": "your_path",
133
+ "data_root": "",
134
+ "media_type": "image"
135
+ }
136
+ ],
137
+ "debug": [
138
+ {
139
+ "anno_path": "your_path",
140
+ "data_root": "",
141
+ "media_type": "image"
142
+ },
143
+ {
144
+ "anno_path": "your_path",
145
+ "data_root": "",
146
+ "media_type": "video"
147
+ }
148
+ ],
149
+ "didemo_ret_test": {
150
+ "anno_path": "your_path",
151
+ "data_root": "",
152
+ "is_paragraph_retrieval": true,
153
+ "max_txt_l": 64,
154
+ "media_type": "video",
155
+ "trimmed30": true
156
+ },
157
+ "didemo_ret_train": {
158
+ "anno_path": "your_path",
159
+ "data_root": "",
160
+ "is_paragraph_retrieval": true,
161
+ "max_txt_l": 64,
162
+ "media_type": "video",
163
+ "trimmed30": true
164
+ },
165
+ "didemo_ret_val": {
166
+ "anno_path": "your_path",
167
+ "data_root": "",
168
+ "is_paragraph_retrieval": true,
169
+ "max_txt_l": 64,
170
+ "media_type": "video",
171
+ "trimmed30": true
172
+ },
173
+ "hmdb51_act_val": {
174
+ "anno_path": "your_path",
175
+ "data_root": "",
176
+ "is_act_rec": true,
177
+ "media_type": "video"
178
+ },
179
+ "internvid_v1": {
180
+ "anno_path": "your_path",
181
+ "data_root": "",
182
+ "jump_filter": true,
183
+ "media_type": "video"
184
+ },
185
+ "internvid_v2_avs_private": {
186
+ "anno_path": "your_path",
187
+ "caption_augmentation": {
188
+ "caption_sample_type": "avs_all"
189
+ },
190
+ "data_root": "",
191
+ "jump_filter": true,
192
+ "media_type": "audio_video",
193
+ "read_audio_from_video": true,
194
+ "read_clip_from_video": false,
195
+ "zero_audio_padding_for_video": true
196
+ },
197
+ "k400_act_val": {
198
+ "anno_path": "your_path",
199
+ "data_root": "",
200
+ "is_act_rec": true
201
+ },
202
+ "k600_act_val": {
203
+ "anno_path": "your_path",
204
+ "data_root": "",
205
+ "is_act_rec": true,
206
+ "media_type": "video"
207
+ },
208
+ "k700_act_val": {
209
+ "anno_path": "your_path",
210
+ "data_root": "",
211
+ "is_act_rec": true,
212
+ "media_type": "video"
213
+ },
214
+ "laion_2b": {
215
+ "anno_path": "your_path",
216
+ "data_root": "",
217
+ "jump_filter": true,
218
+ "media_type": "image"
219
+ },
220
+ "laion_coco": {
221
+ "anno_path": "your_path",
222
+ "data_root": "",
223
+ "jump_filter": true,
224
+ "media_type": "image"
225
+ },
226
+ "laion_pop": {
227
+ "anno_path": "your_path",
228
+ "data_root": "",
229
+ "jump_filter": true,
230
+ "media_type": "image"
231
+ },
232
+ "lsmdc_ret_test_1000": {
233
+ "anno_path": "your_path",
234
+ "data_root": "",
235
+ "media_type": "video"
236
+ },
237
+ "lsmdc_ret_train": {
238
+ "anno_path": "your_path",
239
+ "data_root": "",
240
+ "max_txt_l": 96,
241
+ "media_type": "video"
242
+ },
243
+ "lsmdc_ret_val": {
244
+ "anno_path": "your_path",
245
+ "data_root": "",
246
+ "max_txt_l": 96,
247
+ "media_type": "video"
248
+ },
249
+ "mit_act_val": {
250
+ "anno_path": "your_path",
251
+ "data_root": "",
252
+ "is_act_rec": true,
253
+ "media_type": "video"
254
+ },
255
+ "msrvtt_1k_test": {
256
+ "anno_path": "your_path",
257
+ "data_root": "",
258
+ "media_type": "video"
259
+ },
260
+ "msrvtt_ret_test1k": {
261
+ "anno_path": "your_path",
262
+ "data_root": "",
263
+ "media_type": "video"
264
+ },
265
+ "msrvtt_ret_train9k": {
266
+ "anno_path": "your_path",
267
+ "data_root": "",
268
+ "media_type": "video"
269
+ },
270
+ "msvd_ret_test": {
271
+ "anno_path": "your_path",
272
+ "data_root": "",
273
+ "max_txt_l": 64,
274
+ "media_type": "video"
275
+ },
276
+ "msvd_ret_train": {
277
+ "anno_path": "your_path",
278
+ "data_root": "",
279
+ "has_multi_txt_gt": true,
280
+ "max_txt_l": 64,
281
+ "media_type": "video"
282
+ },
283
+ "msvd_ret_val": {
284
+ "anno_path": "your_path",
285
+ "data_root": "",
286
+ "max_txt_l": 64,
287
+ "media_type": "video"
288
+ },
289
+ "pretrain_example_data_1B": [
290
+ {
291
+ "anno_path": "your_path",
292
+ "data_root": "",
293
+ "media_type": "image"
294
+ },
295
+ {
296
+ "anno_path": "your_path",
297
+ "data_root": "",
298
+ "media_type": "video"
299
+ }
300
+ ],
301
+ "pretrain_example_data_6B": [
302
+ {
303
+ "anno_path": "your_path",
304
+ "data_root": "",
305
+ "media_type": "image"
306
+ },
307
+ {
308
+ "anno_path": "your_path",
309
+ "data_root": "",
310
+ "media_type": "video"
311
+ },
312
+ {
313
+ "anno_path": "your_path",
314
+ "caption_augmentation": {
315
+ "caption_sample_type": "avs_all"
316
+ },
317
+ "data_root": "",
318
+ "jump_filter": true,
319
+ "media_type": "audio_video",
320
+ "read_audio_from_video": true,
321
+ "read_clip_from_video": false,
322
+ "zero_audio_padding_for_video": true
323
+ }
324
+ ],
325
+ "sbu": {
326
+ "anno_path": "your_path",
327
+ "data_root": "",
328
+ "media_type": "image"
329
+ },
330
+ "ssv2_mc_val": {
331
+ "anno_path": "your_path",
332
+ "data_root": "",
333
+ "media_type": "video"
334
+ },
335
+ "ucf101_act_val": {
336
+ "anno_path": "your_path",
337
+ "data_root": "",
338
+ "is_act_rec": true,
339
+ "media_type": "video"
340
+ },
341
+ "vatex_ch_ret_val": {
342
+ "anno_path": "your_path",
343
+ "data_root": "",
344
+ "media_type": "video"
345
+ },
346
+ "vatex_en_ret_train": {
347
+ "anno_path": "your_path",
348
+ "data_root": "",
349
+ "has_multi_txt_gt": true,
350
+ "media_type": "video"
351
+ },
352
+ "vatex_en_ret_val": {
353
+ "anno_path": "your_path",
354
+ "data_root": "",
355
+ "media_type": "video"
356
+ },
357
+ "vg": {
358
+ "anno_path": "your_path",
359
+ "data_root": "",
360
+ "jump_filter": true,
361
+ "media_type": "image"
362
+ },
363
+ "wavcaps_400k": {
364
+ "anno_path": "your_path",
365
+ "data_root": "",
366
+ "media_type": "audio"
367
+ },
368
+ "webvid": {
369
+ "anno_path": "your_path",
370
+ "data_root": "",
371
+ "media_type": "video"
372
+ },
373
+ "webvid_10m": {
374
+ "anno_path": "your_path",
375
+ "data_root": "",
376
+ "media_type": "video"
377
+ },
378
+ "webvid_debug": {
379
+ "anno_path": "your_path",
380
+ "data_root": "",
381
+ "media_type": "video"
382
+ },
383
+ "webvid_fuse_10m": {
384
+ "anno_path": "your_path",
385
+ "data_root": "",
386
+ "jump_filter": true,
387
+ "media_type": "video"
388
+ }
389
+ },
390
+ "batch_size": 8,
391
+ "batch_size_test": 4,
392
+ "compile_model": false,
393
+ "debug": false,
394
+ "deep_fusion": false,
395
+ "deepspeed": {
396
+ "enable": true,
397
+ "stage": 1
398
+ },
399
+ "device": "cuda",
400
+ "dist_url": "env://",
401
+ "evaluate": true,
402
+ "evaluation": {
403
+ "eval_frame_ensemble": "concat",
404
+ "eval_offload": true,
405
+ "eval_x_only": false,
406
+ "k_test": 128
407
+ },
408
+ "gradient_checkpointing": true,
409
+ "inputs": {
410
+ "batch_size": {
411
+ "image": 8,
412
+ "video": 8
413
+ },
414
+ "batch_size_test": {
415
+ "image": 4,
416
+ "video": 4
417
+ },
418
+ "image_res": 224,
419
+ "max_txt_l": {
420
+ "image": 40,
421
+ "video": 40
422
+ },
423
+ "video_input": {
424
+ "num_frames": 4,
425
+ "num_frames_test": 4,
426
+ "random_aug": false,
427
+ "sample_type": "rand",
428
+ "sample_type_test": "middle"
429
+ }
430
+ },
431
+ "is_pretrain": true,
432
+ "jump_evaluate": false,
433
+ "log_freq": 100,
434
+ "max_txt_l": 40,
435
+ "mode": "pt",
436
+ "model": {
437
+ "embed_dim": 512,
438
+ "find_unused_parameters": false,
439
+ "model_cls": "InternVideo2_Stage2",
440
+ "multimodal": {
441
+ "enable": true
442
+ },
443
+ "temp": 0.07,
444
+ "text_encoder": {
445
+ "config": "configs/config_bert_large.json",
446
+ "d_model": 1024,
447
+ "fusion_layer": 19,
448
+ "name": "bert_large",
449
+ "pretrained": "bert-large-uncased"
450
+ },
451
+ "vision_encoder": {
452
+ "checkpoint_num": 40,
453
+ "clip_embed_dim": 768,
454
+ "clip_input_resolution": 224,
455
+ "clip_norm_type": "l2",
456
+ "clip_return_layer": 6,
457
+ "clip_student_return_interval": 1,
458
+ "clip_teacher": null,
459
+ "clip_teacher_embed_dim": 3200,
460
+ "clip_teacher_final_dim": 768,
461
+ "clip_teacher_return_interval": 1,
462
+ "d_model": 1408,
463
+ "image_mask_ratio": 0.5,
464
+ "image_mask_type": "random",
465
+ "img_size": 224,
466
+ "keep_temporal": false,
467
+ "name": "pretrain_internvideo2_6b_patch14_224",
468
+ "num_frames": 4,
469
+ "only_mask": true,
470
+ "patch_size": 14,
471
+ "pretrained": "/mnt/petrelfs/wangyi/internvideo2_ckpt/internvideo2-s2_6b-224p-f4.pt",
472
+ "sep_image_video_pos_embed": true,
473
+ "tubelet_size": 1,
474
+ "use_checkpoint": true,
475
+ "use_flash_attn": false,
476
+ "use_fused_mlp": false,
477
+ "use_fused_rmsnorm": false,
478
+ "video_mask_ratio": 0.8,
479
+ "video_mask_type": "random"
480
+ }
481
+ },
482
+ "num_frames": 4,
483
+ "num_frames_test": 4,
484
+ "num_workers": 6,
485
+ "origin_num_frames": 4,
486
+ "output_dir": null,
487
+ "pretrained_path": "",
488
+ "resume": false,
489
+ "save_latest": false,
490
+ "seed": 42,
491
+ "size_t": 224,
492
+ "text_enc": "bert_large",
493
+ "use_bf16": false,
494
+ "use_flash_sdp": false,
495
+ "use_half_precision": false,
496
+ "use_mem_efficient_sdp": false
497
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ca222644b70507c092811e5b9e498f07b0d52afd2c313fe527b205afd998a9a
3
+ size 25466142320