Corey Morris
commited on
Commit
·
627e0f9
1
Parent(s):
2db58a0
Finding top differences between tasks from the target model
Browse files
app.py
CHANGED
|
@@ -271,11 +271,38 @@ fig_line = create_line_chart(filtered_data, closest_models, metrics_to_compare)
|
|
| 271 |
st.plotly_chart(fig_radar)
|
| 272 |
st.plotly_chart(fig_line)
|
| 273 |
|
| 274 |
-
# show MMLU_average at the beginning of the dataframe
|
| 275 |
|
| 276 |
st.dataframe(filtered_data.loc[closest_models, metrics_to_compare])
|
| 277 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
|
|
|
|
|
|
|
| 279 |
|
| 280 |
|
| 281 |
# end of custom scatter plots
|
|
|
|
| 271 |
st.plotly_chart(fig_radar)
|
| 272 |
st.plotly_chart(fig_line)
|
| 273 |
|
|
|
|
| 274 |
|
| 275 |
st.dataframe(filtered_data.loc[closest_models, metrics_to_compare])
|
| 276 |
|
| 277 |
+
# Function to find the top differences and return them as a DataFrame
|
| 278 |
+
def find_top_differences_table(df, target_model, closest_models, num_differences=10, exclude_columns=['Parameters']):
|
| 279 |
+
# Calculate the absolute differences for each task between the target model and the closest models
|
| 280 |
+
differences = df.loc[closest_models].drop(columns=exclude_columns).sub(df.loc[target_model]).abs()
|
| 281 |
+
# Unstack the differences and sort by the largest absolute difference
|
| 282 |
+
top_differences = differences.unstack().nlargest(num_differences)
|
| 283 |
+
# Convert the top differences to a DataFrame for display
|
| 284 |
+
top_differences_table = pd.DataFrame({
|
| 285 |
+
'Task': [idx[0] for idx in top_differences.index],
|
| 286 |
+
'Difference': top_differences.values
|
| 287 |
+
})
|
| 288 |
+
return top_differences_table, top_differences_table['Task'].tolist()
|
| 289 |
+
|
| 290 |
+
# Your existing code for selecting the target model and finding the closest models
|
| 291 |
+
selected_model_name = "firefly-ziya-13b"
|
| 292 |
+
# closest_models = filtered_data['MMLU_average'].sub(filtered_data.loc[selected_model_name, 'MMLU_average']).abs().nsmallest(5).index.tolist()
|
| 293 |
+
|
| 294 |
+
# Find the top 10 tasks with the largest differences and convert to a DataFrame
|
| 295 |
+
top_differences_table, top_differences_tasks = find_top_differences_table(filtered_data, selected_model_name, closest_models)
|
| 296 |
+
|
| 297 |
+
# Display the table in the Streamlit app
|
| 298 |
+
st.markdown("## Top Differences")
|
| 299 |
+
st.dataframe(top_differences_table)
|
| 300 |
+
|
| 301 |
+
# Create a radar chart for the tasks with the largest differences
|
| 302 |
+
fig_radar_top_differences = create_radar_chart_unfilled(filtered_data, closest_models, top_differences_tasks)
|
| 303 |
|
| 304 |
+
# Display the radar chart
|
| 305 |
+
st.plotly_chart(fig_radar_top_differences)
|
| 306 |
|
| 307 |
|
| 308 |
# end of custom scatter plots
|