broadfield-dev commited on
Commit
cd69d2a
·
verified ·
1 Parent(s): 98fe021

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -10
app.py CHANGED
@@ -45,7 +45,8 @@ def get_dataset_splits(dataset_id: str):
45
  if not dataset_id:
46
  return gr.update(choices=[], value=None, interactive=False)
47
  try:
48
- splits = get_dataset_split_names(dataset_id, trust_remote_code=True)
 
49
  return gr.update(choices=splits, value=splits[0] if splits else None, interactive=True)
50
  except Exception as e:
51
  gr.Warning(f"Could not fetch splits for dataset '{dataset_id}'. Error: {e}")
@@ -56,16 +57,12 @@ def get_split_columns(dataset_id: str):
56
  if not dataset_id:
57
  return gr.update(choices=[], value=None, interactive=False)
58
  try:
59
- # --- THIS IS THE ROBUST FIX ---
60
- # Use get_dataset_config_info to get schema without loading data.
61
- # This is the official and most reliable way.
62
- info = get_dataset_config_info(dataset_id, trust_remote_code=True)
63
  features = info.features
64
 
65
- # The user is right, we should show ALL columns.
66
  columns = list(features.keys())
67
 
68
- # We can still be helpful by guessing the best default.
69
  preferred_cols = ['text', 'content', 'instruction', 'question', 'document', 'prompt']
70
  best_col = next((col for col in preferred_cols if col in columns), columns[0] if columns else None)
71
 
@@ -93,6 +90,8 @@ def generate_atlas(
93
 
94
  progress(0, desc=f"Loading dataset '{dataset_name}' [{split}]...")
95
  try:
 
 
96
  dataset = load_dataset(dataset_name, split=split, trust_remote_code=True)
97
  df = dataset.to_pandas()
98
  except Exception as e:
@@ -170,10 +169,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Embedding Atlas Explorer") as app:
170
  # --- Chained Event Listeners for Dynamic UI ---
171
  hf_user_input.submit(fn=get_user_datasets, inputs=hf_user_input, outputs=dataset_input)
172
 
173
- # When a dataset is selected, get its splits.
174
  dataset_input.change(fn=get_dataset_splits, inputs=dataset_input, outputs=split_input)
175
-
176
- # When a dataset is selected, ALSO get its columns. The split doesn't matter for column schema.
177
  dataset_input.change(fn=get_split_columns, inputs=dataset_input, outputs=text_column_input)
178
 
179
  # --- Button Click Event ---
 
45
  if not dataset_id:
46
  return gr.update(choices=[], value=None, interactive=False)
47
  try:
48
+ # --- FIX: Removed trust_remote_code=True ---
49
+ splits = get_dataset_split_names(dataset_id)
50
  return gr.update(choices=splits, value=splits[0] if splits else None, interactive=True)
51
  except Exception as e:
52
  gr.Warning(f"Could not fetch splits for dataset '{dataset_id}'. Error: {e}")
 
57
  if not dataset_id:
58
  return gr.update(choices=[], value=None, interactive=False)
59
  try:
60
+ # --- FIX: Removed trust_remote_code=True ---
61
+ info = get_dataset_config_info(dataset_id)
 
 
62
  features = info.features
63
 
 
64
  columns = list(features.keys())
65
 
 
66
  preferred_cols = ['text', 'content', 'instruction', 'question', 'document', 'prompt']
67
  best_col = next((col for col in preferred_cols if col in columns), columns[0] if columns else None)
68
 
 
90
 
91
  progress(0, desc=f"Loading dataset '{dataset_name}' [{split}]...")
92
  try:
93
+ # Here, trust_remote_code can be useful if the dataset actually needs it.
94
+ # It's less likely to crash here than in the metadata functions.
95
  dataset = load_dataset(dataset_name, split=split, trust_remote_code=True)
96
  df = dataset.to_pandas()
97
  except Exception as e:
 
169
  # --- Chained Event Listeners for Dynamic UI ---
170
  hf_user_input.submit(fn=get_user_datasets, inputs=hf_user_input, outputs=dataset_input)
171
 
 
172
  dataset_input.change(fn=get_dataset_splits, inputs=dataset_input, outputs=split_input)
 
 
173
  dataset_input.change(fn=get_split_columns, inputs=dataset_input, outputs=text_column_input)
174
 
175
  # --- Button Click Event ---