nazneen commited on
Commit
bac7e94
·
1 Parent(s): 79de90d

interaction

Browse files
Files changed (1) hide show
  1. app.py +29 -14
app.py CHANGED
@@ -19,12 +19,13 @@ def datasets_explorer_viz(df):
19
  s = ColumnDataSource(df)
20
  TOOLTIPS= [("dataset_id", "@dataset_id"), ("task", "@task")]
21
  color = factor_cmap('task', palette=Category20c_20, factors=df['task'].unique())
22
- p = figure(plot_width=1000, plot_height=1000, tools="hover,wheel_zoom,pan,box_select", title="Dataset explorer", tooltips=TOOLTIPS, toolbar_location="above")
23
- p.scatter('x', 'y', size=500, source=s, alpha=0.8,marker='circle',fill_color = color, line_color=color, legend_field = 'task')
24
  p.legend.location = "bottom_right"
25
- #p.legend.click_policy="mute"
26
  p.legend.label_text_font_size="8pt"
27
  table_source = ColumnDataSource(data=dict())
 
28
  columns = [
29
  # TableColumn(field="x", title="X data"),
30
  # TableColumn(field="y", title="Y data"),
@@ -32,7 +33,7 @@ def datasets_explorer_viz(df):
32
  TableColumn(field="dataset_id", title="Dataset ID"),
33
  ]
34
  data_table = DataTable(source=table_source, columns=columns, width=300)
35
-
36
  s.selected.js_on_change('indices', CustomJS(args=dict(umap_source=s, table_source=table_source), code="""
37
  const inds = cb_obj.indices;
38
  const tableData = table_source.data;
@@ -49,30 +50,44 @@ def datasets_explorer_viz(df):
49
  table_source.change.emit();
50
  """
51
  ))
52
- p.selection = ColumnDataSource(data=dict())
53
  #text_input.on_change("value_input",
54
- text_input.value.js_on_change('value', CustomJS(args=dict(plot_source=s, text_input=text_input), code="""
55
- const indices = [];
56
  const plot_data = plot_source.data;
57
- for (var i = 0; i < plot_data['dataset_id'].length(); i++) {
58
- console.log(plot_data['dataset_id'][i]);
59
- if (plot_data['dataset_id'][i] == text_input.value || plot_data['task'][i] == text_input.value) {
60
- indices.push(i)
 
 
 
 
 
 
 
 
 
 
61
  }
62
  }
63
- plot_source.change.emit()
64
  """))
65
- text_input = TextInput(value="", title="Search")
66
  st.bokeh_chart(row(column(text_input,p), data_table))
67
 
68
 
69
  if __name__ == "__main__":
70
  ### STREAMLIT APP CONGFIG ###
71
  st.set_page_config(layout="wide", page_title="Datasets Explorer")
72
-
73
  #lcol, rcol = st.columns([2, 2])
74
  # ******* loading the mode and the data
75
 
76
  ### LOAD DATA AND SESSION VARIABLES ###
 
 
 
 
77
  datasets_df = pd.read_parquet('./assets/data/datasets_df.parquet')
 
78
  datasets_explorer_viz(datasets_df)
 
19
  s = ColumnDataSource(df)
20
  TOOLTIPS= [("dataset_id", "@dataset_id"), ("task", "@task")]
21
  color = factor_cmap('task', palette=Category20c_20, factors=df['task'].unique())
22
+ p = figure(plot_width=1000, plot_height=1000, tools="hover,wheel_zoom,pan,box_select", tooltips=TOOLTIPS, toolbar_location="above")
23
+ p.scatter('x', 'y', size=5, source=s, alpha=0.8,marker='circle',fill_color = color, line_color=color, legend_field = 'task')
24
  p.legend.location = "bottom_right"
25
+ p.legend.click_policy="mute"
26
  p.legend.label_text_font_size="8pt"
27
  table_source = ColumnDataSource(data=dict())
28
+ selection_source = ColumnDataSource(data=dict())
29
  columns = [
30
  # TableColumn(field="x", title="X data"),
31
  # TableColumn(field="y", title="Y data"),
 
33
  TableColumn(field="dataset_id", title="Dataset ID"),
34
  ]
35
  data_table = DataTable(source=table_source, columns=columns, width=300)
36
+ p.circle('x', 'y',source=selection_source, size=5, color= 'red')
37
  s.selected.js_on_change('indices', CustomJS(args=dict(umap_source=s, table_source=table_source), code="""
38
  const inds = cb_obj.indices;
39
  const tableData = table_source.data;
 
50
  table_source.change.emit();
51
  """
52
  ))
53
+ text_input = TextInput(value="", title="Search")
54
  #text_input.on_change("value_input",
55
+ text_input.js_on_change('value', CustomJS(args=dict(plot_source=s, selection_source=selection_source), code="""
 
56
  const plot_data = plot_source.data;
57
+ const selectData = selection_source.data
58
+ const value = cb_obj.value
59
+
60
+ selectData['x'] = []
61
+ selectData['y'] = []
62
+ selectData['dataset_id'] = []
63
+ selectData['task'] = []
64
+
65
+ for (var i = 0; i < plot_data['dataset_id'].length; i++) {
66
+ if (plot_data['dataset_id'][i].includes(value) || plot_data['task'][i].includes(value)) {
67
+ selectData['x'].push(plot_data['x'][i])
68
+ selectData['y'].push(plot_data['y'][i])
69
+ selectData['dataset_id'].push(plot_data['dataset_id'][i])
70
+ selectData['task'].push(plot_data['task'][i])
71
  }
72
  }
73
+ selection_source.change.emit()
74
  """))
75
+
76
  st.bokeh_chart(row(column(text_input,p), data_table))
77
 
78
 
79
  if __name__ == "__main__":
80
  ### STREAMLIT APP CONGFIG ###
81
  st.set_page_config(layout="wide", page_title="Datasets Explorer")
82
+ st.title('Interactive Datasets Explorer')
83
  #lcol, rcol = st.columns([2, 2])
84
  # ******* loading the mode and the data
85
 
86
  ### LOAD DATA AND SESSION VARIABLES ###
87
+ with st.expander("How to interact with the plot:"):
88
+ st.markdown("* Each point in the plot represents a HF hub dataset categorized by their `task_id`.")
89
+ st.markdown("* Every dataset is emebdded using the [SPECTER](https://github.com/allenai/specter#advanced-training-your-own-model) embedding of its corresponding paper abstract.")
90
+ st.markdown("* You can either search for a dataset or drag and select to peek into the cluster content.")
91
  datasets_df = pd.read_parquet('./assets/data/datasets_df.parquet')
92
+ st.warning("Hugging Face 🤗 Datasets Explorer")
93
  datasets_explorer_viz(datasets_df)