gina9726 commited on
Commit
81071ed
·
verified ·
1 Parent(s): 5f18375

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -8
app.py CHANGED
@@ -23,10 +23,8 @@ def load_v2t_samples(data_root):
23
  def load_t2v_samples(data_root):
24
  sample_text = ['cut the sausage', 'stir vegetables into salmon', 'rinse cutting board']
25
  idx2sid = {0: 2119, 1: 1730, 2: 1276}
26
-
27
  return sample_text, idx2sid
28
 
29
-
30
  def format_pred(pred, gt):
31
  tp = '[color=green]{}[/color]'
32
  fp = '[color=red]{}[/color]'
@@ -57,10 +55,10 @@ def main():
57
 
58
  def predict_t2v(idx):
59
  sid = idx2sid_t2v[idx]
60
- zeroshot_video, gt_video = lavila.predict_t2v(idx, sid)
61
  egovpa_video, gt_video = egovpa.predict_t2v(idx, sid)
 
62
 
63
- return gt_video, zeroshot_video, egovpa_video
64
 
65
  with gr.Blocks() as demo:
66
  with gr.Tab("Video-to-text retrieval"):
@@ -97,12 +95,12 @@ def main():
97
  text = gr.Text(label="text query")
98
  with gr.Column():
99
  idx = gr.Number(label="Idx", visible=False)
100
- zeroshot = gr.Textbox(label="LaViLa (zero-shot) prediction")
101
  #zeroshot = gr.Gallery(label="LaViLa (zero-shot) prediction", columns=[3], rows=[1], object_fit="contain", height="auto")
102
- ours = gr.Textbox(label="Ego-VPA prediction")
103
- #ours = gr.Gallery(label="Ego-VPA prediction", columns=[3], rows=[1], object_fit="contain", height="auto")
104
  btn = gr.Button("Predict", variant="primary")
105
- btn.click(predict_t2v, inputs=[idx], outputs=[label, zeroshot, ours])
106
  gr.Examples(examples=[[i, x] for i, x in enumerate(t2v_samples)], inputs=[idx, text])
107
 
108
 
 
23
  def load_t2v_samples(data_root):
24
  sample_text = ['cut the sausage', 'stir vegetables into salmon', 'rinse cutting board']
25
  idx2sid = {0: 2119, 1: 1730, 2: 1276}
 
26
  return sample_text, idx2sid
27
 
 
28
  def format_pred(pred, gt):
29
  tp = '[color=green]{}[/color]'
30
  fp = '[color=red]{}[/color]'
 
55
 
56
  def predict_t2v(idx):
57
  sid = idx2sid_t2v[idx]
 
58
  egovpa_video, gt_video = egovpa.predict_t2v(idx, sid)
59
+ egovpa_video = [f'{data_root}/video/gif/{x}.gif' for x in ego_video]
60
 
61
+ return egovpa_video
62
 
63
  with gr.Blocks() as demo:
64
  with gr.Tab("Video-to-text retrieval"):
 
95
  text = gr.Text(label="text query")
96
  with gr.Column():
97
  idx = gr.Number(label="Idx", visible=False)
98
+ #zeroshot = gr.Textbox(label="LaViLa (zero-shot) prediction")
99
  #zeroshot = gr.Gallery(label="LaViLa (zero-shot) prediction", columns=[3], rows=[1], object_fit="contain", height="auto")
100
+ #ours = gr.Textbox(label="Ego-VPA prediction")
101
+ ours = gr.Gallery(label="Ego-VPA prediction", columns=[3], rows=[1], object_fit="contain", height="auto")
102
  btn = gr.Button("Predict", variant="primary")
103
+ btn.click(predict_t2v, inputs=[idx], outputs=[ours])
104
  gr.Examples(examples=[[i, x] for i, x in enumerate(t2v_samples)], inputs=[idx, text])
105
 
106