yeliudev commited on
Commit
f3b9e64
·
verified ·
1 Parent(s): a25744a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -9
app.py CHANGED
@@ -48,11 +48,14 @@ BADGE = """
48
  <a href="https://github.com/yeliudev/VideoMind/blob/main/README.md" target="_blank">
49
  <img src="https://img.shields.io/badge/License-BSD--3--Clause-purple">
50
  </a>
 
 
 
51
  </div>
52
  """
53
 
54
  LOGO = '<p align="center"><img width="350" src="https://raw.githubusercontent.com/yeliudev/VideoMind/refs/heads/main/.github/logo.png"></p>'
55
- DISC = '**VideoMind** is a multi-modal agent framework that enhances video reasoning by emulating *human-like* processes, such as *breaking down tasks*, *localizing and verifying moments*, and *synthesizing answers*. Our method addresses the unique challenges of temporal-grounded reasoning in a progressive strategy. This demo showcases how VideoMind-2B handles video-language tasks. Please open an <a href="https://github.com/yeliudev/VideoMind/issues/new" target="_blank">issue</a> if you meet any problems.' # noqa
56
 
57
  # yapf:disable
58
  EXAMPLES = [
@@ -562,7 +565,7 @@ def main(video, prompt, role, temperature, max_new_tokens):
562
  def build_demo():
563
  chat = gr.Chatbot(
564
  type='messages',
565
- height='70vh',
566
  avatar_images=[f'{PATH}/assets/user.png', f'{PATH}/assets/bot.png'],
567
  placeholder='A conversation with VideoMind',
568
  label='VideoMind')
@@ -570,7 +573,7 @@ def build_demo():
570
  prompt = gr.Textbox(label='Text Prompt', placeholder='Ask a question about the video...')
571
 
572
  with gr.Blocks(title=TITLE) as demo:
573
- gr.Markdown(LOGO)
574
  gr.HTML(BADGE)
575
  gr.Markdown(DISC)
576
 
@@ -606,11 +609,7 @@ def build_demo():
606
  label='Max Output Tokens',
607
  info='The maximum number of output tokens for each role (Default: 256)')
608
 
609
- with gr.Group():
610
- prompt.render()
611
-
612
- with gr.Accordion(label='Examples', open=False):
613
- gr.Examples(examples=EXAMPLES, inputs=[video, prompt, role], examples_per_page=3)
614
 
615
  with gr.Row():
616
  random_btn = gr.Button(value='🔮 Random')
@@ -624,7 +623,7 @@ def build_demo():
624
  submit_ctx = submit_ctx.then(main, [video, prompt, role, temperature, max_new_tokens], chat)
625
  submit_ctx.then(enable_btns, None, [random_btn, reset_btn, submit_btn])
626
 
627
- gr.Markdown('Need example data? Explore examples tab or click 🔮 Random to sample one!')
628
 
629
  with gr.Column(scale=5):
630
  chat.render()
 
48
  <a href="https://github.com/yeliudev/VideoMind/blob/main/README.md" target="_blank">
49
  <img src="https://img.shields.io/badge/License-BSD--3--Clause-purple">
50
  </a>
51
+ <a href="https://github.com/yeliudev/VideoMind" target="_blank">
52
+ <img src="https://img.shields.io/github/stars/yeliudev/VideoMind">
53
+ </a>
54
  </div>
55
  """
56
 
57
  LOGO = '<p align="center"><img width="350" src="https://raw.githubusercontent.com/yeliudev/VideoMind/refs/heads/main/.github/logo.png"></p>'
58
+ DISC = '**VideoMind** is a multi-modal agent framework that enhances video reasoning by emulating *human-like* processes, such as *breaking down tasks*, *localizing and verifying moments*, and *synthesizing answers*. This demo showcases how VideoMind-2B handles video-language tasks. Please open an <a href="https://github.com/yeliudev/VideoMind/issues/new" target="_blank">issue</a> if you meet any problems.' # noqa
59
 
60
  # yapf:disable
61
  EXAMPLES = [
 
565
  def build_demo():
566
  chat = gr.Chatbot(
567
  type='messages',
568
+ height='70em',
569
  avatar_images=[f'{PATH}/assets/user.png', f'{PATH}/assets/bot.png'],
570
  placeholder='A conversation with VideoMind',
571
  label='VideoMind')
 
573
  prompt = gr.Textbox(label='Text Prompt', placeholder='Ask a question about the video...')
574
 
575
  with gr.Blocks(title=TITLE) as demo:
576
+ gr.HTML(LOGO)
577
  gr.HTML(BADGE)
578
  gr.Markdown(DISC)
579
 
 
609
  label='Max Output Tokens',
610
  info='The maximum number of output tokens for each role (Default: 256)')
611
 
612
+ prompt.render()
 
 
 
 
613
 
614
  with gr.Row():
615
  random_btn = gr.Button(value='🔮 Random')
 
623
  submit_ctx = submit_ctx.then(main, [video, prompt, role, temperature, max_new_tokens], chat)
624
  submit_ctx.then(enable_btns, None, [random_btn, reset_btn, submit_btn])
625
 
626
+ gr.Examples(examples=EXAMPLES, inputs=[video, prompt, role], examples_per_page=3)
627
 
628
  with gr.Column(scale=5):
629
  chat.render()