vkt1414 commited on
Commit
1a87ae9
·
1 Parent(s): e3c19c0

allow user to pick any of the 28 radiomics features

Browse files
Files changed (1) hide show
  1. filter_data_app.py +64 -20
filter_data_app.py CHANGED
@@ -14,7 +14,7 @@ st.set_page_config(layout="wide")
14
  LOCAL_PARQUET_FILE = 'qual-checks-and-quant-values.parquet'
15
 
16
  @st.cache_data
17
- def load_data():
18
  cols = [
19
  'PatientID',
20
  'StudyInstanceUID',
@@ -25,7 +25,7 @@ def load_data():
25
  'laterality_check',
26
  'series_with_vertabra_on_every_slice',
27
  'connected_volumes',
28
- 'Volume from Voxel Summation'
29
  ]
30
  df = pl.read_parquet(LOCAL_PARQUET_FILE, columns=cols)
31
  df = df.with_columns([
@@ -39,11 +39,15 @@ def load_data():
39
  # Function to filter data based on user input
40
  def filter_data(df, filters):
41
  for col, value in filters.items():
42
- if value is not None:
43
  if col == 'connected_volumes' and value:
44
  df = df.filter((pl.col(col) <= value) & (pl.col(col).is_not_null()))
45
  else:
46
  df = df.filter(pl.col(col) == value)
 
 
 
 
47
  return df
48
 
49
  # Function to create an UpSet plot for failed checks
@@ -75,10 +79,10 @@ def create_upset_plot_passes(df):
75
  st.pyplot(fig)
76
 
77
  # Function to calculate standard deviation of volumes within a patient
78
- def calculate_std_dev(df):
79
  df=df.to_pandas()
80
  # Group by 'PatientID' and calculate the standard deviation of 'Volume from Voxel Summation'
81
- std_dev_df = df.groupby(['PatientID','bodyPart'])['Volume from Voxel Summation'].std()
82
  return std_dev_df
83
 
84
  # Main function to run the Streamlit app
@@ -90,7 +94,7 @@ def main():
90
  page = st.sidebar.selectbox("Choose a page", ["Summary", "UpSet Plots"])
91
 
92
  # Load the data
93
- df = load_data()
94
 
95
  if page == "UpSet Plots":
96
  with st.sidebar:
@@ -104,7 +108,8 @@ def main():
104
  'laterality_check': None,
105
  'series_with_vertabra_on_every_slice': None,
106
  'connected_volumes': None,
107
- 'laterality': None
 
108
  }
109
 
110
  filters = st.session_state.filters
@@ -116,7 +121,8 @@ def main():
116
  'laterality_check': None,
117
  'series_with_vertabra_on_every_slice': None,
118
  'connected_volumes': None,
119
- 'laterality': None
 
120
  })
121
  st.session_state.filters = filters
122
 
@@ -124,6 +130,49 @@ def main():
124
  filters[filter_name] = value
125
  st.session_state.filters = filters
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  # Body part filter
128
  body_part_options = sorted(df['bodyPart'].unique().to_list())
129
  body_part = st.selectbox("Body Part", options=body_part_options, key='bodyPart', on_change=reset_filters)
@@ -173,13 +222,6 @@ def main():
173
  on_change=lambda: apply_filter('series_with_vertabra_on_every_slice', st.session_state.series_with_vertabra_on_every_slice)
174
  )
175
 
176
- # connected_volumes = st.selectbox(
177
- # "Connected Volumes (<= value)",
178
- # options=connected_volumes_options,
179
- # index=connected_volumes_options.index(filters['connected_volumes']) if filters['connected_volumes'] else 0,
180
- # key='connected_volumes',
181
- # on_change=lambda: apply_filter('connected_volumes', st.session_state.connected_volumes)
182
- # )
183
  connected_volumes = st.selectbox(
184
  "Connected Volumes (<= value)",
185
  options=[None] + connected_volumes_options,
@@ -258,14 +300,14 @@ def main():
258
  import pandas as pd
259
 
260
  # Assuming calculate_std_dev returns a Series
261
- std_dev_before_filtering = calculate_std_dev(body_part_df)
262
- std_dev_after_filtering = calculate_std_dev(filtered_df)
263
 
264
  # Convert Series to DataFrame and add 'Filtering' column
265
- std_dev_before_filtering = std_dev_before_filtering.reset_index().rename(columns={0: 'Volume from Voxel Summation'})
266
  std_dev_before_filtering['Filtering'] = 'Before Filtering'
267
 
268
- std_dev_after_filtering = std_dev_after_filtering.reset_index().rename(columns={0: 'Volume from Voxel Summation'})
269
  std_dev_after_filtering['Filtering'] = 'After Filtering'
270
 
271
  # Combine the dataframes for easier plotting
@@ -278,9 +320,11 @@ def main():
278
  st.header("Violin Plots for Standard Deviation of Volumes")
279
  st.write("This plot shows the distribution of standard deviation of volumes within a patient.")
280
  fig2, ax = plt.subplots()
281
- sns.violinplot(x='Filtering', y='Volume from Voxel Summation', data=combined_df, ax=ax)
282
  ax.set_ylabel("Standard Deviation of Volumes")
283
  st.pyplot(fig2)
 
 
284
 
285
 
286
  # Define the pages
 
14
  LOCAL_PARQUET_FILE = 'qual-checks-and-quant-values.parquet'
15
 
16
  @st.cache_data
17
+ def load_data(radiomics_feature='Volume from Voxel Summation'):
18
  cols = [
19
  'PatientID',
20
  'StudyInstanceUID',
 
25
  'laterality_check',
26
  'series_with_vertabra_on_every_slice',
27
  'connected_volumes',
28
+ radiomics_feature # Include the selected radiomics feature column
29
  ]
30
  df = pl.read_parquet(LOCAL_PARQUET_FILE, columns=cols)
31
  df = df.with_columns([
 
39
  # Function to filter data based on user input
40
  def filter_data(df, filters):
41
  for col, value in filters.items():
42
+ if value is not None and col != 'radiomics_feature': # Exclude radiomics_feature from filtering
43
  if col == 'connected_volumes' and value:
44
  df = df.filter((pl.col(col) <= value) & (pl.col(col).is_not_null()))
45
  else:
46
  df = df.filter(pl.col(col) == value)
47
+ # Filter based on radiomics feature
48
+ radiomics_feature = filters.get('radiomics_feature')
49
+ if radiomics_feature:
50
+ df = df.filter(pl.col(radiomics_feature) is not None) # Filter where the radiomics feature is not None
51
  return df
52
 
53
  # Function to create an UpSet plot for failed checks
 
79
  st.pyplot(fig)
80
 
81
  # Function to calculate standard deviation of volumes within a patient
82
+ def calculate_std_dev(df,radiomics_feature):
83
  df=df.to_pandas()
84
  # Group by 'PatientID' and calculate the standard deviation of 'Volume from Voxel Summation'
85
+ std_dev_df = df.groupby(['PatientID','bodyPart'])[radiomics_feature].std()
86
  return std_dev_df
87
 
88
  # Main function to run the Streamlit app
 
94
  page = st.sidebar.selectbox("Choose a page", ["Summary", "UpSet Plots"])
95
 
96
  # Load the data
97
+ #df = load_data()
98
 
99
  if page == "UpSet Plots":
100
  with st.sidebar:
 
108
  'laterality_check': None,
109
  'series_with_vertabra_on_every_slice': None,
110
  'connected_volumes': None,
111
+ 'laterality': None,
112
+ 'radiomics_feature': 'Volume from Voxel Summation' # Default radiomics feature
113
  }
114
 
115
  filters = st.session_state.filters
 
121
  'laterality_check': None,
122
  'series_with_vertabra_on_every_slice': None,
123
  'connected_volumes': None,
124
+ 'laterality': None,
125
+ 'radiomics_feature': 'Volume from Voxel Summation'
126
  })
127
  st.session_state.filters = filters
128
 
 
130
  filters[filter_name] = value
131
  st.session_state.filters = filters
132
 
133
+
134
+ # Radiomics feature selection
135
+ radiomics_feature_options = [
136
+ 'Volume from Voxel Summation', # Default option
137
+ '10th percentile',
138
+ '90th percentile',
139
+ 'Elongation',
140
+ 'Energy',
141
+ 'Flatness',
142
+ 'Intensity Histogram Entropy',
143
+ 'Intensity histogram uniformity',
144
+ 'Interquartile range',
145
+ 'Kurtosis',
146
+ 'Least Axis in 3D Length',
147
+ 'Major Axis in 3D Length',
148
+ 'Maximum 3D Diameter of a Mesh',
149
+ 'Maximum grey level',
150
+ 'Mean',
151
+ 'Mean absolute deviation',
152
+ 'Median',
153
+ 'Minimum grey level',
154
+ 'Minor Axis in 3D Length',
155
+ 'Range',
156
+ 'Robust mean absolute deviation',
157
+ 'Root mean square',
158
+ 'Skewness',
159
+ 'Sphericity',
160
+ 'Surface Area of Mesh',
161
+ 'Surface to Volume Ratio',
162
+ 'Variance',
163
+ 'Volume from Voxel Summation',
164
+ 'Volume of Mesh'
165
+ ]
166
+ radiomics_feature = st.selectbox(
167
+ "Radiomics Feature",
168
+ options=radiomics_feature_options,
169
+ index=radiomics_feature_options.index(filters['radiomics_feature']) if filters['radiomics_feature'] else 0,
170
+ key='radiomics_feature',
171
+ on_change=lambda: apply_filter('radiomics_feature', st.session_state.radiomics_feature)
172
+ )
173
+ df = load_data(radiomics_feature=radiomics_feature)
174
+ filters['radiomics_feature'] = radiomics_feature
175
+
176
  # Body part filter
177
  body_part_options = sorted(df['bodyPart'].unique().to_list())
178
  body_part = st.selectbox("Body Part", options=body_part_options, key='bodyPart', on_change=reset_filters)
 
222
  on_change=lambda: apply_filter('series_with_vertabra_on_every_slice', st.session_state.series_with_vertabra_on_every_slice)
223
  )
224
 
 
 
 
 
 
 
 
225
  connected_volumes = st.selectbox(
226
  "Connected Volumes (<= value)",
227
  options=[None] + connected_volumes_options,
 
300
  import pandas as pd
301
 
302
  # Assuming calculate_std_dev returns a Series
303
+ std_dev_before_filtering = calculate_std_dev(body_part_df, radiomics_feature)
304
+ std_dev_after_filtering = calculate_std_dev(filtered_df, radiomics_feature)
305
 
306
  # Convert Series to DataFrame and add 'Filtering' column
307
+ std_dev_before_filtering = std_dev_before_filtering.reset_index().rename(columns={0: radiomics_feature})
308
  std_dev_before_filtering['Filtering'] = 'Before Filtering'
309
 
310
+ std_dev_after_filtering = std_dev_after_filtering.reset_index().rename(columns={0: radiomics_feature})
311
  std_dev_after_filtering['Filtering'] = 'After Filtering'
312
 
313
  # Combine the dataframes for easier plotting
 
320
  st.header("Violin Plots for Standard Deviation of Volumes")
321
  st.write("This plot shows the distribution of standard deviation of volumes within a patient.")
322
  fig2, ax = plt.subplots()
323
+ sns.violinplot(x='Filtering', y=radiomics_feature, data=combined_df, ax=ax)
324
  ax.set_ylabel("Standard Deviation of Volumes")
325
  st.pyplot(fig2)
326
+ body_part_df
327
+ filtered_df
328
 
329
 
330
  # Define the pages