awacke1 commited on
Commit
4a402b9
·
verified ·
1 Parent(s): 7616943

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -27
app.py CHANGED
@@ -136,55 +136,68 @@ class VideoSearch:
136
  st.warning("Using example data embeddings")
137
  self.dataset = self.load_example_data()
138
 
139
- # Debug the embedding data
140
- st.write("Sample video_embed:", self.dataset['video_embed'].iloc[0])
141
- st.write("Sample description_embed:", self.dataset['description_embed'].iloc[0])
 
 
 
 
142
 
143
  # Convert string representations of embeddings back to numpy arrays
144
- def parse_embedding(embed_str):
145
  try:
146
- # Remove any string formatting artifacts
147
- cleaned_str = str(embed_str).strip()
148
- if cleaned_str.startswith('[') and cleaned_str.endswith(']'):
149
- # Split by comma and convert to floats
150
- values = [float(x.strip()) for x in cleaned_str[1:-1].split(',')]
151
- return values
152
- return []
 
 
 
 
153
  except Exception as e:
154
- st.error(f"Error parsing embedding: {e}")
155
- return []
 
156
 
157
- # Process embeddings
158
  video_embeds = []
159
  text_embeds = []
160
 
161
  for idx in range(len(self.dataset)):
162
  try:
163
- video_embed = parse_embedding(self.dataset['video_embed'].iloc[idx])
164
- desc_embed = parse_embedding(self.dataset['description_embed'].iloc[idx])
165
 
166
- if video_embed and desc_embed:
167
  video_embeds.append(video_embed)
168
  text_embeds.append(desc_embed)
 
 
169
  except Exception as e:
170
- st.error(f"Error processing row {idx}: {e}")
 
171
 
172
  if video_embeds and text_embeds:
173
- self.video_embeds = np.array(video_embeds)
174
- self.text_embeds = np.array(text_embeds)
175
- st.success(f"Successfully processed {len(video_embeds)} embeddings")
 
 
 
 
 
176
  else:
177
- st.warning("Falling back to random embeddings")
178
  num_rows = len(self.dataset)
179
  self.video_embeds = np.random.randn(num_rows, 384)
180
  self.text_embeds = np.random.randn(num_rows, 384)
181
 
182
- # Debug output
183
- st.write("Video embeddings shape:", self.video_embeds.shape)
184
- st.write("Text embeddings shape:", self.text_embeds.shape)
185
-
186
  except Exception as e:
187
- st.error(f"Error preparing features: {e}")
188
  import traceback
189
  st.write("Traceback:", traceback.format_exc())
190
  # Create random embeddings as fallback
 
136
  st.warning("Using example data embeddings")
137
  self.dataset = self.load_example_data()
138
 
139
+ # Debug: Show raw data types and first row
140
+ st.write("Data Types:", self.dataset.dtypes)
141
+ st.write("\nFirst row of embeddings:")
142
+ st.write("video_embed type:", type(self.dataset['video_embed'].iloc[0]))
143
+ st.write("video_embed content:", self.dataset['video_embed'].iloc[0])
144
+ st.write("\ndescription_embed type:", type(self.dataset['description_embed'].iloc[0]))
145
+ st.write("description_embed content:", self.dataset['description_embed'].iloc[0])
146
 
147
  # Convert string representations of embeddings back to numpy arrays
148
+ def safe_eval_list(s):
149
  try:
150
+ # Clean the string representation
151
+ if isinstance(s, str):
152
+ s = s.replace('[', '').replace(']', '').strip()
153
+ # Split by whitespace and/or commas
154
+ numbers = [float(x.strip()) for x in s.split() if x.strip()]
155
+ return numbers
156
+ elif isinstance(s, list):
157
+ return [float(x) for x in s]
158
+ else:
159
+ st.error(f"Unexpected type for embedding: {type(s)}")
160
+ return None
161
  except Exception as e:
162
+ st.error(f"Error parsing embedding: {str(e)}")
163
+ st.write("Problematic string:", s)
164
+ return None
165
 
166
+ # Process embeddings with detailed error reporting
167
  video_embeds = []
168
  text_embeds = []
169
 
170
  for idx in range(len(self.dataset)):
171
  try:
172
+ video_embed = safe_eval_list(self.dataset['video_embed'].iloc[idx])
173
+ desc_embed = safe_eval_list(self.dataset['description_embed'].iloc[idx])
174
 
175
+ if video_embed is not None and desc_embed is not None:
176
  video_embeds.append(video_embed)
177
  text_embeds.append(desc_embed)
178
+ else:
179
+ st.warning(f"Skipping row {idx} due to parsing failure")
180
  except Exception as e:
181
+ st.error(f"Error processing row {idx}: {str(e)}")
182
+ st.write("Row data:", self.dataset.iloc[idx])
183
 
184
  if video_embeds and text_embeds:
185
+ try:
186
+ self.video_embeds = np.array(video_embeds)
187
+ self.text_embeds = np.array(text_embeds)
188
+ st.success(f"Successfully processed {len(video_embeds)} embeddings")
189
+ st.write("Video embeddings shape:", self.video_embeds.shape)
190
+ st.write("Text embeddings shape:", self.text_embeds.shape)
191
+ except Exception as e:
192
+ st.error(f"Error converting to numpy arrays: {str(e)}")
193
  else:
194
+ st.warning("No valid embeddings found, using random embeddings")
195
  num_rows = len(self.dataset)
196
  self.video_embeds = np.random.randn(num_rows, 384)
197
  self.text_embeds = np.random.randn(num_rows, 384)
198
 
 
 
 
 
199
  except Exception as e:
200
+ st.error(f"Error preparing features: {str(e)}")
201
  import traceback
202
  st.write("Traceback:", traceback.format_exc())
203
  # Create random embeddings as fallback