Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -48,11 +48,14 @@ BADGE = """
|
|
48 |
<a href="https://github.com/yeliudev/VideoMind/blob/main/README.md" target="_blank">
|
49 |
<img src="https://img.shields.io/badge/License-BSD--3--Clause-purple">
|
50 |
</a>
|
|
|
|
|
|
|
51 |
</div>
|
52 |
"""
|
53 |
|
54 |
LOGO = '<p align="center"><img width="350" src="https://raw.githubusercontent.com/yeliudev/VideoMind/refs/heads/main/.github/logo.png"></p>'
|
55 |
-
DISC = '**VideoMind** is a multi-modal agent framework that enhances video reasoning by emulating *human-like* processes, such as *breaking down tasks*, *localizing and verifying moments*, and *synthesizing answers*.
|
56 |
|
57 |
# yapf:disable
|
58 |
EXAMPLES = [
|
@@ -562,7 +565,7 @@ def main(video, prompt, role, temperature, max_new_tokens):
|
|
562 |
def build_demo():
|
563 |
chat = gr.Chatbot(
|
564 |
type='messages',
|
565 |
-
height='
|
566 |
avatar_images=[f'{PATH}/assets/user.png', f'{PATH}/assets/bot.png'],
|
567 |
placeholder='A conversation with VideoMind',
|
568 |
label='VideoMind')
|
@@ -570,7 +573,7 @@ def build_demo():
|
|
570 |
prompt = gr.Textbox(label='Text Prompt', placeholder='Ask a question about the video...')
|
571 |
|
572 |
with gr.Blocks(title=TITLE) as demo:
|
573 |
-
gr.
|
574 |
gr.HTML(BADGE)
|
575 |
gr.Markdown(DISC)
|
576 |
|
@@ -606,11 +609,7 @@ def build_demo():
|
|
606 |
label='Max Output Tokens',
|
607 |
info='The maximum number of output tokens for each role (Default: 256)')
|
608 |
|
609 |
-
|
610 |
-
prompt.render()
|
611 |
-
|
612 |
-
with gr.Accordion(label='Examples', open=False):
|
613 |
-
gr.Examples(examples=EXAMPLES, inputs=[video, prompt, role], examples_per_page=3)
|
614 |
|
615 |
with gr.Row():
|
616 |
random_btn = gr.Button(value='🔮 Random')
|
@@ -624,7 +623,7 @@ def build_demo():
|
|
624 |
submit_ctx = submit_ctx.then(main, [video, prompt, role, temperature, max_new_tokens], chat)
|
625 |
submit_ctx.then(enable_btns, None, [random_btn, reset_btn, submit_btn])
|
626 |
|
627 |
-
gr.
|
628 |
|
629 |
with gr.Column(scale=5):
|
630 |
chat.render()
|
|
|
48 |
<a href="https://github.com/yeliudev/VideoMind/blob/main/README.md" target="_blank">
|
49 |
<img src="https://img.shields.io/badge/License-BSD--3--Clause-purple">
|
50 |
</a>
|
51 |
+
<a href="https://github.com/yeliudev/VideoMind" target="_blank">
|
52 |
+
<img src="https://img.shields.io/github/stars/yeliudev/VideoMind">
|
53 |
+
</a>
|
54 |
</div>
|
55 |
"""
|
56 |
|
57 |
LOGO = '<p align="center"><img width="350" src="https://raw.githubusercontent.com/yeliudev/VideoMind/refs/heads/main/.github/logo.png"></p>'
|
58 |
+
DISC = '**VideoMind** is a multi-modal agent framework that enhances video reasoning by emulating *human-like* processes, such as *breaking down tasks*, *localizing and verifying moments*, and *synthesizing answers*. This demo showcases how VideoMind-2B handles video-language tasks. Please open an <a href="https://github.com/yeliudev/VideoMind/issues/new" target="_blank">issue</a> if you meet any problems.' # noqa
|
59 |
|
60 |
# yapf:disable
|
61 |
EXAMPLES = [
|
|
|
565 |
def build_demo():
|
566 |
chat = gr.Chatbot(
|
567 |
type='messages',
|
568 |
+
height='70em',
|
569 |
avatar_images=[f'{PATH}/assets/user.png', f'{PATH}/assets/bot.png'],
|
570 |
placeholder='A conversation with VideoMind',
|
571 |
label='VideoMind')
|
|
|
573 |
prompt = gr.Textbox(label='Text Prompt', placeholder='Ask a question about the video...')
|
574 |
|
575 |
with gr.Blocks(title=TITLE) as demo:
|
576 |
+
gr.HTML(LOGO)
|
577 |
gr.HTML(BADGE)
|
578 |
gr.Markdown(DISC)
|
579 |
|
|
|
609 |
label='Max Output Tokens',
|
610 |
info='The maximum number of output tokens for each role (Default: 256)')
|
611 |
|
612 |
+
prompt.render()
|
|
|
|
|
|
|
|
|
613 |
|
614 |
with gr.Row():
|
615 |
random_btn = gr.Button(value='🔮 Random')
|
|
|
623 |
submit_ctx = submit_ctx.then(main, [video, prompt, role, temperature, max_new_tokens], chat)
|
624 |
submit_ctx.then(enable_btns, None, [random_btn, reset_btn, submit_btn])
|
625 |
|
626 |
+
gr.Examples(examples=EXAMPLES, inputs=[video, prompt, role], examples_per_page=3)
|
627 |
|
628 |
with gr.Column(scale=5):
|
629 |
chat.render()
|