awacke1 commited on
Commit
7616943
·
verified ·
1 Parent(s): 8630bc3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -7
app.py CHANGED
@@ -136,19 +136,57 @@ class VideoSearch:
136
  st.warning("Using example data embeddings")
137
  self.dataset = self.load_example_data()
138
 
 
 
 
 
139
  # Convert string representations of embeddings back to numpy arrays
140
- try:
141
- self.video_embeds = np.array([json.loads(e) if isinstance(e, str) else e
142
- for e in self.dataset.video_embed])
143
- self.text_embeds = np.array([json.loads(e) if isinstance(e, str) else e
144
- for e in self.dataset.description_embed])
145
- except Exception as e:
146
- st.error(f"Error converting embeddings: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  num_rows = len(self.dataset)
148
  self.video_embeds = np.random.randn(num_rows, 384)
149
  self.text_embeds = np.random.randn(num_rows, 384)
 
 
 
 
 
150
  except Exception as e:
151
  st.error(f"Error preparing features: {e}")
 
 
152
  # Create random embeddings as fallback
153
  num_rows = len(self.dataset)
154
  self.video_embeds = np.random.randn(num_rows, 384)
 
136
  st.warning("Using example data embeddings")
137
  self.dataset = self.load_example_data()
138
 
139
+ # Debug the embedding data
140
+ st.write("Sample video_embed:", self.dataset['video_embed'].iloc[0])
141
+ st.write("Sample description_embed:", self.dataset['description_embed'].iloc[0])
142
+
143
  # Convert string representations of embeddings back to numpy arrays
144
+ def parse_embedding(embed_str):
145
+ try:
146
+ # Remove any string formatting artifacts
147
+ cleaned_str = str(embed_str).strip()
148
+ if cleaned_str.startswith('[') and cleaned_str.endswith(']'):
149
+ # Split by comma and convert to floats
150
+ values = [float(x.strip()) for x in cleaned_str[1:-1].split(',')]
151
+ return values
152
+ return []
153
+ except Exception as e:
154
+ st.error(f"Error parsing embedding: {e}")
155
+ return []
156
+
157
+ # Process embeddings
158
+ video_embeds = []
159
+ text_embeds = []
160
+
161
+ for idx in range(len(self.dataset)):
162
+ try:
163
+ video_embed = parse_embedding(self.dataset['video_embed'].iloc[idx])
164
+ desc_embed = parse_embedding(self.dataset['description_embed'].iloc[idx])
165
+
166
+ if video_embed and desc_embed:
167
+ video_embeds.append(video_embed)
168
+ text_embeds.append(desc_embed)
169
+ except Exception as e:
170
+ st.error(f"Error processing row {idx}: {e}")
171
+
172
+ if video_embeds and text_embeds:
173
+ self.video_embeds = np.array(video_embeds)
174
+ self.text_embeds = np.array(text_embeds)
175
+ st.success(f"Successfully processed {len(video_embeds)} embeddings")
176
+ else:
177
+ st.warning("Falling back to random embeddings")
178
  num_rows = len(self.dataset)
179
  self.video_embeds = np.random.randn(num_rows, 384)
180
  self.text_embeds = np.random.randn(num_rows, 384)
181
+
182
+ # Debug output
183
+ st.write("Video embeddings shape:", self.video_embeds.shape)
184
+ st.write("Text embeddings shape:", self.text_embeds.shape)
185
+
186
  except Exception as e:
187
  st.error(f"Error preparing features: {e}")
188
+ import traceback
189
+ st.write("Traceback:", traceback.format_exc())
190
  # Create random embeddings as fallback
191
  num_rows = len(self.dataset)
192
  self.video_embeds = np.random.randn(num_rows, 384)