Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -45,7 +45,8 @@ def get_dataset_splits(dataset_id: str):
|
|
45 |
if not dataset_id:
|
46 |
return gr.update(choices=[], value=None, interactive=False)
|
47 |
try:
|
48 |
-
|
|
|
49 |
return gr.update(choices=splits, value=splits[0] if splits else None, interactive=True)
|
50 |
except Exception as e:
|
51 |
gr.Warning(f"Could not fetch splits for dataset '{dataset_id}'. Error: {e}")
|
@@ -56,16 +57,12 @@ def get_split_columns(dataset_id: str):
|
|
56 |
if not dataset_id:
|
57 |
return gr.update(choices=[], value=None, interactive=False)
|
58 |
try:
|
59 |
-
# ---
|
60 |
-
|
61 |
-
# This is the official and most reliable way.
|
62 |
-
info = get_dataset_config_info(dataset_id, trust_remote_code=True)
|
63 |
features = info.features
|
64 |
|
65 |
-
# The user is right, we should show ALL columns.
|
66 |
columns = list(features.keys())
|
67 |
|
68 |
-
# We can still be helpful by guessing the best default.
|
69 |
preferred_cols = ['text', 'content', 'instruction', 'question', 'document', 'prompt']
|
70 |
best_col = next((col for col in preferred_cols if col in columns), columns[0] if columns else None)
|
71 |
|
@@ -93,6 +90,8 @@ def generate_atlas(
|
|
93 |
|
94 |
progress(0, desc=f"Loading dataset '{dataset_name}' [{split}]...")
|
95 |
try:
|
|
|
|
|
96 |
dataset = load_dataset(dataset_name, split=split, trust_remote_code=True)
|
97 |
df = dataset.to_pandas()
|
98 |
except Exception as e:
|
@@ -170,10 +169,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Embedding Atlas Explorer") as app:
|
|
170 |
# --- Chained Event Listeners for Dynamic UI ---
|
171 |
hf_user_input.submit(fn=get_user_datasets, inputs=hf_user_input, outputs=dataset_input)
|
172 |
|
173 |
-
# When a dataset is selected, get its splits.
|
174 |
dataset_input.change(fn=get_dataset_splits, inputs=dataset_input, outputs=split_input)
|
175 |
-
|
176 |
-
# When a dataset is selected, ALSO get its columns. The split doesn't matter for column schema.
|
177 |
dataset_input.change(fn=get_split_columns, inputs=dataset_input, outputs=text_column_input)
|
178 |
|
179 |
# --- Button Click Event ---
|
|
|
45 |
if not dataset_id:
|
46 |
return gr.update(choices=[], value=None, interactive=False)
|
47 |
try:
|
48 |
+
# --- FIX: Removed trust_remote_code=True ---
|
49 |
+
splits = get_dataset_split_names(dataset_id)
|
50 |
return gr.update(choices=splits, value=splits[0] if splits else None, interactive=True)
|
51 |
except Exception as e:
|
52 |
gr.Warning(f"Could not fetch splits for dataset '{dataset_id}'. Error: {e}")
|
|
|
57 |
if not dataset_id:
|
58 |
return gr.update(choices=[], value=None, interactive=False)
|
59 |
try:
|
60 |
+
# --- FIX: Removed trust_remote_code=True ---
|
61 |
+
info = get_dataset_config_info(dataset_id)
|
|
|
|
|
62 |
features = info.features
|
63 |
|
|
|
64 |
columns = list(features.keys())
|
65 |
|
|
|
66 |
preferred_cols = ['text', 'content', 'instruction', 'question', 'document', 'prompt']
|
67 |
best_col = next((col for col in preferred_cols if col in columns), columns[0] if columns else None)
|
68 |
|
|
|
90 |
|
91 |
progress(0, desc=f"Loading dataset '{dataset_name}' [{split}]...")
|
92 |
try:
|
93 |
+
# Here, trust_remote_code can be useful if the dataset actually needs it.
|
94 |
+
# It's less likely to crash here than in the metadata functions.
|
95 |
dataset = load_dataset(dataset_name, split=split, trust_remote_code=True)
|
96 |
df = dataset.to_pandas()
|
97 |
except Exception as e:
|
|
|
169 |
# --- Chained Event Listeners for Dynamic UI ---
|
170 |
hf_user_input.submit(fn=get_user_datasets, inputs=hf_user_input, outputs=dataset_input)
|
171 |
|
|
|
172 |
dataset_input.change(fn=get_dataset_splits, inputs=dataset_input, outputs=split_input)
|
|
|
|
|
173 |
dataset_input.change(fn=get_split_columns, inputs=dataset_input, outputs=text_column_input)
|
174 |
|
175 |
# --- Button Click Event ---
|