PierreBrunelle commited on
Commit
2be8e0f
·
verified ·
1 Parent(s): f895f15

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -159
app.py CHANGED
@@ -2,185 +2,116 @@ import gradio as gr
2
  import pixeltable as pxt
3
  from pixeltable.functions.huggingface import clip
4
  from pixeltable.iterators import FrameIterator
 
5
  import os
6
- import logging
7
-
8
- # Set up logging
9
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
10
- logger = logging.getLogger(__name__)
11
-
12
- # Initialize Pixeltable directory constants
13
- PROJECT_DIR = 'video_search'
14
- VIDEOS_TABLE = f'{PROJECT_DIR}.videos'
15
- FRAMES_VIEW = f'{PROJECT_DIR}.frames'
16
 
17
  # Process video and create index
18
  def process_video(video_file, progress=gr.Progress()):
19
- if video_file is None:
20
- return "Please upload a video file first."
21
-
22
- try:
23
- progress(0, desc="Initializing...")
24
- logger.info(f"Processing video: {video_file.name}")
25
 
26
- # Pixeltable setup
27
- pxt.drop_dir(PROJECT_DIR, force=True)
28
- pxt.create_dir(PROJECT_DIR)
29
 
30
- # Create video table
31
- video_table = pxt.create_table(VIDEOS_TABLE, {'video': pxt.Video})
32
 
33
- # Create frames view
34
- frames_view = pxt.create_view(
35
- FRAMES_VIEW,
36
- video_table,
37
- iterator=FrameIterator.create(video=video_table.video, fps=1)
38
- )
39
-
40
- progress(0.2, desc="Inserting video...")
41
- video_table.insert([{'video': video_file.name}])
42
-
43
- progress(0.4, desc="Creating embedding index...")
44
- # Use the CLIP model for both text and image embedding
45
- frames_view.add_embedding_index(
46
- 'frame',
47
- embedding=clip.using(model_id='openai/clip-vit-base-patch32')
48
- )
49
 
50
- progress(1.0, desc="Processing complete")
51
- return "✅ Video processed successfully! You can now search for specific moments using text or images."
52
 
53
- except Exception as e:
54
- logger.error(f"Error processing video: {str(e)}")
55
- return f"Error processing video: {str(e)}"
 
 
 
 
 
 
56
 
57
  # Perform similarity search
58
  def similarity_search(query, search_type, num_results, progress=gr.Progress()):
59
- try:
60
- if not query:
61
- return []
62
-
63
- frames_view = pxt.get_table(FRAMES_VIEW)
64
- if frames_view is None:
65
- return []
66
-
67
- progress(0.5, desc="Performing search...")
68
  sim = frames_view.frame.similarity(query)
69
-
70
- results = frames_view.order_by(sim, asc=False).limit(num_results).select(
71
- frames_view.frame,
72
- similarity=sim
73
- ).collect()
74
-
75
- progress(1.0, desc="Search complete")
76
- return [row['frame'] for row in results]
77
 
78
- except Exception as e:
79
- logger.error(f"Error during search: {str(e)}")
80
- return []
81
-
82
- # Create CSS for better styling
83
- css = """
84
- .container {
85
- max-width: 1200px;
86
- margin: 0 auto;
87
- }
88
- .header {
89
- display: flex;
90
- align-items: center;
91
- margin-bottom: 20px;
92
- }
93
- .header img {
94
- max-width: 120px;
95
- margin-right: 20px;
96
- }
97
- .step-header {
98
- background-color: #f5f5f5;
99
- padding: 10px;
100
- border-radius: 5px;
101
- margin-bottom: 15px;
102
- }
103
- .examples-section {
104
- margin-top: 30px;
105
- }
106
- """
107
-
108
  # Gradio interface
109
- with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
110
- gr.HTML(
111
  """
112
- <div class="header">
113
- <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/resources/pixeltable-logo-large.png" alt="Pixeltable" />
114
- <div>
115
- <h1>Video Frame Search with AI</h1>
116
- <p>Search through video content using natural language or images powered by <a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #F25022; text-decoration: none; font-weight: bold;">Pixeltable</a>.</p>
117
- </div>
118
  </div>
119
  """
120
  )
 
 
 
 
 
 
 
 
121
 
122
  with gr.Row():
123
  with gr.Column(scale=1):
124
- gr.HTML('<div class="step-header"><h3>1. Insert video</h3></div>')
 
 
 
 
125
 
126
- video_file = gr.File(label="Upload Video", file_types=["video"])
127
- process_button = gr.Button("Process Video", variant="primary")
128
  process_output = gr.Textbox(label="Status", lines=2)
129
 
130
- gr.HTML('<div class="step-header"><h3>2. Search video frames</h3></div>')
 
 
 
131
 
132
- search_type = gr.Radio(
133
- ["Text", "Image"],
134
- label="Search Type",
135
- value="Text",
136
- info="Choose whether to search using text or an image"
137
- )
138
- text_input = gr.Textbox(
139
- label="Text Query",
140
- placeholder="Describe what you're looking for...",
141
- info="Example: 'person walking' or 'red car'"
142
- )
143
- image_input = gr.Image(
144
- label="Image Query",
145
- type="pil",
146
- visible=False,
147
- info="Upload an image to find similar frames"
148
- )
149
- num_results = gr.Slider(
150
- minimum=1,
151
- maximum=20,
152
- value=5,
153
- step=1,
154
- label="Number of Results",
155
- info="How many matching frames to display"
156
- )
157
- search_button = gr.Button("Search", variant="primary")
158
 
159
  with gr.Column(scale=2):
160
- gr.HTML('<div class="step-header"><h3>3. Visualize results</h3></div>')
 
 
 
 
161
 
162
- results_gallery = gr.Gallery(
163
- label="Search Results",
164
- columns=3,
165
- allow_preview=True,
166
- object_fit="contain"
167
- )
168
 
169
- with gr.Accordion("Example Videos", open=False):
170
- gr.Markdown("Click one of the examples below to get started")
171
- gr.Examples(
172
- examples=[
173
- ["bangkok.mp4"],
174
- ["lotr.mp4"],
175
- ["mi.mp4"],
176
- ],
177
- inputs=[video_file],
178
- outputs=[process_output],
179
- fn=process_video,
180
- cache_examples=True
181
- )
182
 
183
- # Handle UI interactions
184
  def update_search_input(choice):
185
  return gr.update(visible=choice=="Text"), gr.update(visible=choice=="Image")
186
 
@@ -194,9 +125,6 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
194
 
195
  def perform_search(search_type, text_query, image_query, num_results):
196
  query = text_query if search_type == "Text" else image_query
197
- if query is None or (isinstance(query, str) and query.strip() == ""):
198
- return gr.Gallery(label="Please enter a valid search query")
199
-
200
  return similarity_search(query, search_type, num_results)
201
 
202
  search_button.click(
@@ -205,13 +133,5 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
205
  outputs=[results_gallery]
206
  )
207
 
208
- # Add keyboard shortcuts
209
- search_type.change(lambda: None, None, None, _js="() => {document.activeElement.blur();}")
210
- text_input.submit(
211
- perform_search,
212
- inputs=[search_type, text_input, image_input, num_results],
213
- outputs=[results_gallery]
214
- )
215
-
216
  if __name__ == "__main__":
217
  demo.launch()
 
2
  import pixeltable as pxt
3
  from pixeltable.functions.huggingface import clip
4
  from pixeltable.iterators import FrameIterator
5
+ import PIL.Image
6
  import os
 
 
 
 
 
 
 
 
 
 
7
 
8
  # Process video and create index
9
  def process_video(video_file, progress=gr.Progress()):
10
+ progress(0, desc="Initializing...")
 
 
 
 
 
11
 
12
+ # Pixeltable setup
13
+ pxt.drop_dir('video_search', force=True)
14
+ pxt.create_dir('video_search')
15
 
16
+ # Update type declaration to use simpler syntax
17
+ video_table = pxt.create_table('video_search.videos', {'video': pxt.Video})
18
 
19
+ frames_view = pxt.create_view(
20
+ 'video_search.frames',
21
+ video_table,
22
+ iterator=FrameIterator.create(video=video_table.video, fps=1)
23
+ )
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ progress(0.2, desc="Inserting video...")
26
+ video_table.insert([{'video': video_file.name}])
27
 
28
+ progress(0.4, desc="Creating embedding index...")
29
+ # Updated embedding pattern using .using()
30
+ frames_view.add_embedding_index(
31
+ 'frame',
32
+ embedding=clip.using(model_id='openai/clip-vit-base-patch32')
33
+ )
34
+
35
+ progress(1.0, desc="Processing complete")
36
+ return "Good news! Your video has been processed. Easily find the moments you need by searching with text or images."
37
 
38
  # Perform similarity search
39
  def similarity_search(query, search_type, num_results, progress=gr.Progress()):
40
+ frames_view = pxt.get_table('video_search.frames')
41
+
42
+ progress(0.5, desc="Performing search...")
43
+ if search_type == "Text":
44
+ sim = frames_view.frame.similarity(query)
45
+ else: # Image search
 
 
 
46
  sim = frames_view.frame.similarity(query)
 
 
 
 
 
 
 
 
47
 
48
+ results = frames_view.order_by(sim, asc=False).limit(num_results).select(frames_view.frame, sim=sim).collect()
49
+
50
+ progress(1.0, desc="Search complete")
51
+ return [row['frame'] for row in results]
52
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  # Gradio interface
54
+ with gr.Blocks(theme=gr.themes.Base()) as demo:
55
+ gr.Markdown(
56
  """
57
+ <div style= margin-bottom: 20px;">
58
+ <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/resources/pixeltable-logo-large.png" alt="Pixeltable" style="max-width: 150px;" />
59
+ <h2>Text and Image similarity search on video frames with embedding indexes</h2>
 
 
 
60
  </div>
61
  """
62
  )
63
+ gr.HTML(
64
+ """
65
+ <p>
66
+ <a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #F25022; text-decoration: none; font-weight: bold;">Pixeltable</a> is a declarative interface for working with text, images, embeddings, and even video, enabling you to store, transform, index, and iterate on data.
67
+ </p>
68
+ """
69
+ )
70
+
71
 
72
  with gr.Row():
73
  with gr.Column(scale=1):
74
+
75
+ gr.Markdown(
76
+ """
77
+ <h3>1. Insert video</h3>
78
+ """)
79
 
80
+ video_file = gr.File(label="Upload Video")
81
+ process_button = gr.Button("Process Video")
82
  process_output = gr.Textbox(label="Status", lines=2)
83
 
84
+ gr.Markdown(
85
+ """
86
+ <h3>2. Search video frames</h3>
87
+ """)
88
 
89
+ search_type = gr.Radio(["Text", "Image"], label="Search Type", value="Text")
90
+ text_input = gr.Textbox(label="Text Query")
91
+ image_input = gr.Image(label="Image Query", type="pil", visible=False)
92
+ num_results = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Number of Results")
93
+ search_button = gr.Button("Search")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
  with gr.Column(scale=2):
96
+
97
+ gr.Markdown(
98
+ """
99
+ <h3>3. Visualize results</h3>
100
+ """)
101
 
102
+ results_gallery = gr.Gallery(label="Search Results", columns=3)
 
 
 
 
 
103
 
104
+ gr.Examples(
105
+ examples=[
106
+ ["bangkok.mp4"],
107
+ ["lotr.mp4"],
108
+ ["mi.mp4"],
109
+ ],
110
+ label="Click one of the examples below to get started",
111
+ inputs=[video_file],
112
+ fn=process_video
113
+ )
 
 
 
114
 
 
115
  def update_search_input(choice):
116
  return gr.update(visible=choice=="Text"), gr.update(visible=choice=="Image")
117
 
 
125
 
126
  def perform_search(search_type, text_query, image_query, num_results):
127
  query = text_query if search_type == "Text" else image_query
 
 
 
128
  return similarity_search(query, search_type, num_results)
129
 
130
  search_button.click(
 
133
  outputs=[results_gallery]
134
  )
135
 
 
 
 
 
 
 
 
 
136
  if __name__ == "__main__":
137
  demo.launch()