Spaces:
Running
on
Zero
Running
on
Zero
Upload folder using huggingface_hub
Browse files- .gitattributes +16 -0
- .gitignore +4 -0
- README.md +1 -1
- app.py +84 -59
- examples/10309844035.mp4 +3 -0
- examples/13887487955.mp4 +3 -0
- examples/4167294363.mp4 +3 -0
- examples/4742652230.mp4 +3 -0
- examples/4766274786.mp4 +3 -0
- examples/5012237466.mp4 +3 -0
- examples/5188348585.mp4 +3 -0
- examples/9383140374.mp4 +3 -0
- examples/DTInxNfWXVc_210.0_360.0.mp4 +3 -0
- examples/RoripwjYFp8_210.0_360.0.mp4 +3 -0
- examples/UFWQKrcbhjI_360.0_510.0.mp4 +3 -0
- examples/Z3-IZ3HAmIA_60.0_210.0.mp4 +3 -0
- examples/h6QKDqomIPk_210.0_360.0.mp4 +3 -0
- examples/pA6Z-qYhSNg_60.0_210.0.mp4 +3 -0
- examples/rrTIeJRVGjg_60.0_210.0.mp4 +3 -0
- examples/yId2wIocTys_210.0_360.0.mp4 +3 -0
- requirements.txt +6 -2
- setup.cfg +1 -1
.gitattributes
CHANGED
@@ -49,3 +49,19 @@ data/h6QKDqomIPk_210.0_360.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
|
49 |
data/pA6Z-qYhSNg_60.0_210.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
50 |
data/rrTIeJRVGjg_60.0_210.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
51 |
data/yId2wIocTys_210.0_360.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
data/pA6Z-qYhSNg_60.0_210.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
50 |
data/rrTIeJRVGjg_60.0_210.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
51 |
data/yId2wIocTys_210.0_360.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
52 |
+
examples/10309844035.mp4 filter=lfs diff=lfs merge=lfs -text
|
53 |
+
examples/13887487955.mp4 filter=lfs diff=lfs merge=lfs -text
|
54 |
+
examples/4167294363.mp4 filter=lfs diff=lfs merge=lfs -text
|
55 |
+
examples/4742652230.mp4 filter=lfs diff=lfs merge=lfs -text
|
56 |
+
examples/4766274786.mp4 filter=lfs diff=lfs merge=lfs -text
|
57 |
+
examples/5012237466.mp4 filter=lfs diff=lfs merge=lfs -text
|
58 |
+
examples/5188348585.mp4 filter=lfs diff=lfs merge=lfs -text
|
59 |
+
examples/9383140374.mp4 filter=lfs diff=lfs merge=lfs -text
|
60 |
+
examples/DTInxNfWXVc_210.0_360.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
61 |
+
examples/RoripwjYFp8_210.0_360.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
62 |
+
examples/UFWQKrcbhjI_360.0_510.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
63 |
+
examples/Z3-IZ3HAmIA_60.0_210.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
64 |
+
examples/h6QKDqomIPk_210.0_360.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
65 |
+
examples/pA6Z-qYhSNg_60.0_210.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
66 |
+
examples/rrTIeJRVGjg_60.0_210.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
67 |
+
examples/yId2wIocTys_210.0_360.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
.gitignore
CHANGED
@@ -5,5 +5,9 @@ __pycache__
|
|
5 |
*$py.class
|
6 |
|
7 |
# Temporary data
|
|
|
|
|
|
|
|
|
8 |
.DS_Store
|
9 |
._*
|
|
|
5 |
*$py.class
|
6 |
|
7 |
# Temporary data
|
8 |
+
/data*
|
9 |
+
/demo/examples
|
10 |
+
/model_zoo
|
11 |
+
/work_dirs*
|
12 |
.DS_Store
|
13 |
._*
|
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: 💡
|
|
4 |
colorFrom: red
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
pinned: true
|
10 |
license: bsd-3-clause
|
|
|
4 |
colorFrom: red
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 5.15.0
|
8 |
app_file: app.py
|
9 |
pinned: true
|
10 |
license: bsd-3-clause
|
app.py
CHANGED
@@ -5,60 +5,92 @@ import json
|
|
5 |
import os
|
6 |
import random
|
7 |
import time
|
8 |
-
from functools import partial
|
9 |
|
10 |
import gradio as gr
|
11 |
import nncore
|
|
|
12 |
import torch
|
13 |
from huggingface_hub import snapshot_download
|
14 |
|
15 |
-
import spaces
|
16 |
from videomind.constants import GROUNDER_PROMPT, PLANNER_PROMPT, VERIFIER_PROMPT
|
17 |
from videomind.dataset.utils import process_vision_info
|
18 |
from videomind.model.builder import build_model
|
19 |
from videomind.utils.io import get_duration
|
20 |
from videomind.utils.parser import parse_query, parse_span
|
21 |
|
|
|
|
|
|
|
|
|
22 |
BASE_MODEL = 'model_zoo/Qwen2-VL-2B-Instruct'
|
23 |
-
|
24 |
|
25 |
MODEL = 'model_zoo/VideoMind-2B'
|
26 |
-
|
27 |
|
28 |
TITLE = 'VideoMind: A Chain-of-LoRA Agent for Long Video Reasoning'
|
29 |
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
# yapf:disable
|
34 |
EXAMPLES = [
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
]
|
52 |
# yapf:enable
|
53 |
|
54 |
-
|
|
|
55 |
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
|
64 |
def seconds_to_hms(seconds):
|
@@ -89,7 +121,9 @@ def reset_components():
|
|
89 |
|
90 |
|
91 |
@spaces.GPU
|
92 |
-
def main(video, prompt, role, temperature, max_new_tokens
|
|
|
|
|
93 |
history = []
|
94 |
|
95 |
if not video:
|
@@ -525,40 +559,20 @@ def main(video, prompt, role, temperature, max_new_tokens, model, processor, dev
|
|
525 |
yield history
|
526 |
|
527 |
|
528 |
-
|
529 |
-
if not nncore.is_dir(BASE_MODEL):
|
530 |
-
snapshot_download(BASE_MODEL_HF, local_dir=BASE_MODEL)
|
531 |
-
|
532 |
-
if not nncore.is_dir(MODEL):
|
533 |
-
snapshot_download(MODEL_HF, local_dir=MODEL)
|
534 |
-
|
535 |
-
print('Initializing role *grounder*')
|
536 |
-
model, processor = build_model(MODEL)
|
537 |
-
|
538 |
-
print('Initializing role *planner*')
|
539 |
-
model.load_adapter(nncore.join(MODEL, 'planner'), adapter_name='planner')
|
540 |
-
|
541 |
-
print('Initializing role *verifier*')
|
542 |
-
model.load_adapter(nncore.join(MODEL, 'verifier'), adapter_name='verifier')
|
543 |
-
|
544 |
-
device = torch.device('cuda')
|
545 |
-
|
546 |
-
main = partial(main, model=model, processor=processor, device=device)
|
547 |
-
|
548 |
-
path = os.path.dirname(os.path.realpath(__file__))
|
549 |
-
|
550 |
chat = gr.Chatbot(
|
551 |
type='messages',
|
552 |
height='70vh',
|
553 |
-
avatar_images=[f'{
|
554 |
placeholder='A conversation with VideoMind',
|
555 |
label='VideoMind')
|
556 |
|
557 |
prompt = gr.Textbox(label='Text Prompt', placeholder='Ask a question about the video...')
|
558 |
|
559 |
-
with gr.Blocks(title=TITLE
|
560 |
-
gr.Markdown(
|
561 |
-
gr.
|
|
|
562 |
|
563 |
with gr.Row():
|
564 |
with gr.Column(scale=3):
|
@@ -592,7 +606,11 @@ if __name__ == '__main__':
|
|
592 |
label='Max Output Tokens',
|
593 |
info='The maximum number of output tokens for each role (Default: 256)')
|
594 |
|
595 |
-
|
|
|
|
|
|
|
|
|
596 |
|
597 |
with gr.Row():
|
598 |
random_btn = gr.Button(value='🔮 Random')
|
@@ -606,9 +624,16 @@ if __name__ == '__main__':
|
|
606 |
submit_ctx = submit_ctx.then(main, [video, prompt, role, temperature, max_new_tokens], chat)
|
607 |
submit_ctx.then(enable_btns, None, [random_btn, reset_btn, submit_btn])
|
608 |
|
609 |
-
gr.Markdown('##### Need
|
610 |
|
611 |
with gr.Column(scale=5):
|
612 |
chat.render()
|
613 |
|
614 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
import os
|
6 |
import random
|
7 |
import time
|
|
|
8 |
|
9 |
import gradio as gr
|
10 |
import nncore
|
11 |
+
import spaces
|
12 |
import torch
|
13 |
from huggingface_hub import snapshot_download
|
14 |
|
|
|
15 |
from videomind.constants import GROUNDER_PROMPT, PLANNER_PROMPT, VERIFIER_PROMPT
|
16 |
from videomind.dataset.utils import process_vision_info
|
17 |
from videomind.model.builder import build_model
|
18 |
from videomind.utils.io import get_duration
|
19 |
from videomind.utils.parser import parse_query, parse_span
|
20 |
|
21 |
+
os.environ['TOKENIZERS_PARALLELISM'] = 'false'
|
22 |
+
|
23 |
+
PATH = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
|
24 |
+
|
25 |
BASE_MODEL = 'model_zoo/Qwen2-VL-2B-Instruct'
|
26 |
+
BASE_MODEL_REPO = 'Qwen/Qwen2-VL-2B-Instruct'
|
27 |
|
28 |
MODEL = 'model_zoo/VideoMind-2B'
|
29 |
+
MODEL_REPO = 'yeliudev/VideoMind-2B'
|
30 |
|
31 |
TITLE = 'VideoMind: A Chain-of-LoRA Agent for Long Video Reasoning'
|
32 |
|
33 |
+
BADGE = """
|
34 |
+
<h3 align="center" style="margin-top: -0.5em;">A Chain-of-LoRA Agent for Long Video Reasoning</h3>
|
35 |
+
<div style="display: flex; justify-content: center; gap: 5px; margin-bottom: -0.7em !important;">
|
36 |
+
<a href="https://arxiv.org/abs/2503.13444" target="_blank">
|
37 |
+
<img src="https://img.shields.io/badge/arXiv-2503.13444-red">
|
38 |
+
</a>
|
39 |
+
<a href="https://videomind.github.io/" target="_blank">
|
40 |
+
<img src="https://img.shields.io/badge/Project-Page-brightgreen">
|
41 |
+
</a>
|
42 |
+
<a href="https://huggingface.co/collections/yeliudev/videomind-67dd41f42c57f0e7433afb36" target="_blank">
|
43 |
+
<img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue">
|
44 |
+
</a>
|
45 |
+
<a href="https://huggingface.co/datasets/yeliudev/VideoMind-Dataset" target="_blank">
|
46 |
+
<img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Dataset-orange">
|
47 |
+
</a>
|
48 |
+
<a href="https://github.com/yeliudev/VideoMind/blob/main/README.md" target="_blank">
|
49 |
+
<img src="https://img.shields.io/badge/License-BSD--3--Clause-purple">
|
50 |
+
</a>
|
51 |
+
</div>
|
52 |
+
"""
|
53 |
+
|
54 |
+
LOGO = '<p align="center"><img width="350" src="https://raw.githubusercontent.com/yeliudev/VideoMind/refs/heads/main/.github/logo.png"></p>'
|
55 |
+
DISC = '**VideoMind** is a multi-modal agent framework that enhances video reasoning by emulating *human-like* processes, such as *breaking down tasks*, *localizing and verifying moments*, and *synthesizing answers*. This approach addresses the unique challenges of temporal-grounded reasoning in a progressive strategy. This demo showcases how VideoMind-2B handles video-language tasks. Please open an <a href="https://github.com/yeliudev/VideoMind/issues/new" target="_blank">issue</a> if you meet any problems or have any suggestions.' # noqa
|
56 |
|
57 |
# yapf:disable
|
58 |
EXAMPLES = [
|
59 |
+
[f'{PATH}/examples/4167294363.mp4', 'Why did the old man stand up?', ['pla', 'gnd', 'ver', 'ans']],
|
60 |
+
[f'{PATH}/examples/5012237466.mp4', 'How does the child in stripes react about the fountain?', ['pla', 'gnd', 'ver', 'ans']],
|
61 |
+
[f'{PATH}/examples/13887487955.mp4', 'What did the excavator do after it pushed the cement forward?', ['pla', 'gnd', 'ver', 'ans']],
|
62 |
+
[f'{PATH}/examples/5188348585.mp4', 'What did the person do before pouring the liquor?', ['pla', 'gnd', 'ver', 'ans']],
|
63 |
+
[f'{PATH}/examples/4766274786.mp4', 'What did the girl do after the baby lost the balloon?', ['pla', 'gnd', 'ver', 'ans']],
|
64 |
+
[f'{PATH}/examples/4742652230.mp4', 'Why is the girl pushing the boy only around the toy but not to other places?', ['pla', 'gnd', 'ver', 'ans']],
|
65 |
+
[f'{PATH}/examples/9383140374.mp4', 'How does the girl in pink control the movement of the claw?', ['pla', 'gnd', 'ver', 'ans']],
|
66 |
+
[f'{PATH}/examples/10309844035.mp4', 'Why are they holding up the phones?', ['pla', 'gnd', 'ver', 'ans']],
|
67 |
+
[f'{PATH}/examples/pA6Z-qYhSNg_60.0_210.0.mp4', 'Different types of meat products are being cut, shaped and prepared', ['gnd', 'ver']],
|
68 |
+
[f'{PATH}/examples/UFWQKrcbhjI_360.0_510.0.mp4', 'A man talks to the camera whilst walking along a roadside in a rural area', ['gnd', 'ver']],
|
69 |
+
[f'{PATH}/examples/RoripwjYFp8_210.0_360.0.mp4', 'A woman wearing glasses eating something at a street market', ['gnd', 'ver']],
|
70 |
+
[f'{PATH}/examples/h6QKDqomIPk_210.0_360.0.mp4', 'A toddler sits in his car seat, holding his yellow tablet', ['gnd', 'ver']],
|
71 |
+
[f'{PATH}/examples/Z3-IZ3HAmIA_60.0_210.0.mp4', 'A view from the window as the plane accelerates and takes off from the runway', ['gnd', 'ver']],
|
72 |
+
[f'{PATH}/examples/yId2wIocTys_210.0_360.0.mp4', "Temporally locate the visual content mentioned in the text query 'kids exercise in front of parked cars' within the video.", ['pla', 'gnd', 'ver']],
|
73 |
+
[f'{PATH}/examples/rrTIeJRVGjg_60.0_210.0.mp4', "Localize the moment that provides relevant context about 'man stands in front of a white building monologuing'.", ['pla', 'gnd', 'ver']],
|
74 |
+
[f'{PATH}/examples/DTInxNfWXVc_210.0_360.0.mp4', "Find the video segment that corresponds to the given textual query 'man with headphones talking'.", ['pla', 'gnd', 'ver']],
|
75 |
]
|
76 |
# yapf:enable
|
77 |
|
78 |
+
if not nncore.is_dir(BASE_MODEL):
|
79 |
+
snapshot_download(BASE_MODEL_REPO, local_dir=BASE_MODEL)
|
80 |
|
81 |
+
if not nncore.is_dir(MODEL):
|
82 |
+
snapshot_download(MODEL_REPO, local_dir=MODEL)
|
83 |
+
|
84 |
+
print('Initializing role *grounder*')
|
85 |
+
model, processor = build_model(MODEL)
|
86 |
+
|
87 |
+
print('Initializing role *planner*')
|
88 |
+
model.load_adapter(nncore.join(MODEL, 'planner'), adapter_name='planner')
|
89 |
+
|
90 |
+
print('Initializing role *verifier*')
|
91 |
+
model.load_adapter(nncore.join(MODEL, 'verifier'), adapter_name='verifier')
|
92 |
+
|
93 |
+
device = torch.device('cuda')
|
94 |
|
95 |
|
96 |
def seconds_to_hms(seconds):
|
|
|
121 |
|
122 |
|
123 |
@spaces.GPU
|
124 |
+
def main(video, prompt, role, temperature, max_new_tokens):
|
125 |
+
global model, processor, device
|
126 |
+
|
127 |
history = []
|
128 |
|
129 |
if not video:
|
|
|
559 |
yield history
|
560 |
|
561 |
|
562 |
+
def build_demo():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
563 |
chat = gr.Chatbot(
|
564 |
type='messages',
|
565 |
height='70vh',
|
566 |
+
avatar_images=[f'{PATH}/assets/user.png', f'{PATH}/assets/bot.png'],
|
567 |
placeholder='A conversation with VideoMind',
|
568 |
label='VideoMind')
|
569 |
|
570 |
prompt = gr.Textbox(label='Text Prompt', placeholder='Ask a question about the video...')
|
571 |
|
572 |
+
with gr.Blocks(title=TITLE) as demo:
|
573 |
+
gr.Markdown(LOGO)
|
574 |
+
gr.HTML(BADGE)
|
575 |
+
gr.Markdown(DISC)
|
576 |
|
577 |
with gr.Row():
|
578 |
with gr.Column(scale=3):
|
|
|
606 |
label='Max Output Tokens',
|
607 |
info='The maximum number of output tokens for each role (Default: 256)')
|
608 |
|
609 |
+
with gr.Group():
|
610 |
+
prompt.render()
|
611 |
+
|
612 |
+
with gr.Accordion(label='Examples', open=False):
|
613 |
+
gr.Examples(examples=EXAMPLES, inputs=[video, prompt, role], examples_per_page=3)
|
614 |
|
615 |
with gr.Row():
|
616 |
random_btn = gr.Button(value='🔮 Random')
|
|
|
624 |
submit_ctx = submit_ctx.then(main, [video, prompt, role, temperature, max_new_tokens], chat)
|
625 |
submit_ctx.then(enable_btns, None, [random_btn, reset_btn, submit_btn])
|
626 |
|
627 |
+
gr.Markdown('##### Need example data? Explore examples tab or click 🔮 Random to sample one!')
|
628 |
|
629 |
with gr.Column(scale=5):
|
630 |
chat.render()
|
631 |
|
632 |
+
return demo
|
633 |
+
|
634 |
+
|
635 |
+
if __name__ == '__main__':
|
636 |
+
demo = build_demo()
|
637 |
+
|
638 |
+
demo.queue()
|
639 |
+
demo.launch(server_name='0.0.0.0', allowed_paths=[f'{PATH}/assets', f'{PATH}/examples'])
|
examples/10309844035.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8996ff134787d6b769c2491b9079a02c05953465ad770f07a8d9138e2668d24f
|
3 |
+
size 4041678
|
examples/13887487955.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5fecab1076ee42b3804718f9f64bef06cbfafd6995ad5f5ee42ba6354721429
|
3 |
+
size 5544739
|
examples/4167294363.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d0e0a4a381836f68e16a816d87f241fed3e31ea321f544b921743d6c1c50666
|
3 |
+
size 6611151
|
examples/4742652230.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8733ab4b0716d13ea7a79fc4ddacaf9eede567db364f0ecddfa4582c2f237f82
|
3 |
+
size 2200304
|
examples/4766274786.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:afa38a9ce9e89f934293214d79755c89159664223b3ca366813fd5fe524ed013
|
3 |
+
size 3395545
|
examples/5012237466.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd1929aa93d037f809f402e9801047125dc9fe8060301e69ded9ba1f2d785cc8
|
3 |
+
size 4822293
|
examples/5188348585.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b225f448a546ba2f65958f18c6731a6dde9b1f437014e90036b22eb40e9ad0a5
|
3 |
+
size 5051675
|
examples/9383140374.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:30b6b3eb43f711bef194150d473a59850ff5d7fec0f5cc30e7526aa9e382303f
|
3 |
+
size 2518081
|
examples/DTInxNfWXVc_210.0_360.0.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a09eee0dc404688731fb768c120d3519605f2343376b9bd727a71b91379fd9a9
|
3 |
+
size 4999970
|
examples/RoripwjYFp8_210.0_360.0.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b39b15158dc20c0bc6f1758a9239c8f3eed20ba4a90953338eec2246fa8f1f0
|
3 |
+
size 9287252
|
examples/UFWQKrcbhjI_360.0_510.0.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8669153d9ffac4b5534c20fab8d795347f5babe588da9b8330e049d623ebb443
|
3 |
+
size 14510618
|
examples/Z3-IZ3HAmIA_60.0_210.0.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b3a342993ee61efc5f3b859cd9c1e0d360b3331eed9deb8466891e4bcacc554
|
3 |
+
size 14397799
|
examples/h6QKDqomIPk_210.0_360.0.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:103820de2b8a1a3935b39ed80d91cd08e546e5617310b3d1bb3dadb06b2ffb95
|
3 |
+
size 13485144
|
examples/pA6Z-qYhSNg_60.0_210.0.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c84660fd4ebd8c23a2a7364174b1e819fec8b0e1cb8b9d9cd86f9e429cbdf66c
|
3 |
+
size 8658509
|
examples/rrTIeJRVGjg_60.0_210.0.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:efe6f48a49963bd4880ef5065840e05dd25e2aa975870140bcdaf4220bbd2827
|
3 |
+
size 11410412
|
examples/yId2wIocTys_210.0_360.0.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:447fcb1fd1f94ed6a88d56dd0f6f859646cb8c58ed8e3b7a82f374e2cfee1646
|
3 |
+
size 14769130
|
requirements.txt
CHANGED
@@ -1,10 +1,8 @@
|
|
1 |
accelerate==1.2.1
|
2 |
decord==0.6.0
|
3 |
-
gradio==4.44.1
|
4 |
nncore==0.4.5
|
5 |
pandas==2.2.3
|
6 |
peft==0.14.0
|
7 |
-
pydantic==2.10.6
|
8 |
pysrt==1.1.2
|
9 |
scikit-image==0.25.0
|
10 |
scikit-learn==1.6.1
|
@@ -13,6 +11,12 @@ spaces==0.34.0
|
|
13 |
termplotlib==0.3.9
|
14 |
triton==3.0.0
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
# our codebase contains necessary patches for 4.45.2
|
17 |
transformers==4.45.2
|
18 |
|
|
|
1 |
accelerate==1.2.1
|
2 |
decord==0.6.0
|
|
|
3 |
nncore==0.4.5
|
4 |
pandas==2.2.3
|
5 |
peft==0.14.0
|
|
|
6 |
pysrt==1.1.2
|
7 |
scikit-image==0.25.0
|
8 |
scikit-learn==1.6.1
|
|
|
11 |
termplotlib==0.3.9
|
12 |
triton==3.0.0
|
13 |
|
14 |
+
# gradio 5.16.0 to 5.23.1 have wrong horizontal margins
|
15 |
+
gradio==5.15.0
|
16 |
+
|
17 |
+
# https://github.com/gradio-app/gradio/issues/10662
|
18 |
+
pydantic==2.10.6
|
19 |
+
|
20 |
# our codebase contains necessary patches for 4.45.2
|
21 |
transformers==4.45.2
|
22 |
|
setup.cfg
CHANGED
@@ -7,7 +7,7 @@ split_before_expression_after_opening_paren = true
|
|
7 |
[isort]
|
8 |
line_length = 120
|
9 |
multi_line_output = 0
|
10 |
-
known_third_party = decord,deepspeed,gradio,huggingface_hub,nncore,numpy,pandas,peft,PIL,pysrt,safetensors,tabulate,termplotlib,torch,torchvision,transformers
|
11 |
no_lines_before = STDLIB,LOCALFOLDER
|
12 |
default_section = FIRSTPARTY
|
13 |
|
|
|
7 |
[isort]
|
8 |
line_length = 120
|
9 |
multi_line_output = 0
|
10 |
+
known_third_party = decord,deepspeed,gradio,huggingface_hub,nncore,numpy,pandas,peft,PIL,pysrt,safetensors,spaces,tabulate,termplotlib,torch,torchvision,transformers
|
11 |
no_lines_before = STDLIB,LOCALFOLDER
|
12 |
default_section = FIRSTPARTY
|
13 |
|