File size: 17,077 Bytes
4d6df18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1862231
4d6df18
1862231
4d6df18
1862231
 
 
4d6df18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a3e05a5
4d6df18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a3e05a5
4d6df18
 
a3e05a5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
# -*- coding: utf-8 -*-
"""ranking_simulation_0.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1GzjewA8ePLJ1fdk76Qax-aRRsSsNp417
"""

# Commented out IPython magic to ensure Python compatibility.
# %%capture
# !pip install gradio
# !pip install opinionated
#

import numpy as np
import pandas as pd
import opinionated
import matplotlib.pyplot as plt
plt.style.use("opinionated_rc")

from opinionated.core import download_googlefont
download_googlefont('Quicksand', add_to_cache=True)
plt.rc('font', family='Quicksand')


# import colormaps as cmaps

def simulate_applicant_judging(num_applicants, num_judges, ratings_per_applicant, alpha, beta,
                               judge_error=1, judgment_coarse_graining=False):
    """
    Simulates judging of applicants by a set of judges with coarse graining functionality and random tie-breaking in ranking.

    :param num_applicants: Number of applicants to be judged.
    :param num_judges: Number of judges available for judging.
    :param ratings_per_applicant: Number of ratings each applicant should receive.
    :param alpha: Alpha parameter for the Beta distribution.
    :param beta: Beta parameter for the Beta distribution.
    :param judge_error_std_dev: Standard deviation for the judge's margin of error.
    :param judgment_coarse_graining: Number of buckets for coarse graining (2 to 100) or False to disable.
    :return: A Pandas DataFrame with detailed results for each applicant.
    """
    # Generate the quality of applicants from a Beta distribution normalized to 0-100
    applicant_qualities = np.random.beta(alpha, beta, num_applicants) * 100

    # Function to apply coarse graining
    def coarse_grain_evaluation(evaluation, grain_size):
        return round(evaluation / (100 / grain_size)) * (100 / grain_size)

    # Initialize evaluations dictionary
    evaluations = {f"Applicant_{i+1}": [] for i in range(num_applicants)}
    judge_workload = np.zeros(num_judges)

    # Randomly assign judges to applicants
    for _ in range(ratings_per_applicant):
        for applicant in range(num_applicants):
            probabilities = (max(judge_workload) - judge_workload + 1) / sum(max(judge_workload) - judge_workload + 1)
            judge = np.random.choice(num_judges, p=probabilities)
            judge_workload[judge] += 1
            evaluation = np.random.uniform(applicant_qualities[applicant]-judge_error, applicant_qualities[applicant]+judge_error)

            # Apply coarse graining if enabled
            if judgment_coarse_graining:
                evaluation = coarse_grain_evaluation(evaluation, judgment_coarse_graining)

            evaluations[f"Applicant_{applicant+1}"].append(evaluation)

    # Prepare data for DataFrame
    data = []
    for applicant, (quality, scores) in enumerate(zip(applicant_qualities, evaluations.values()), 1):
        average_evaluation = np.mean(scores)
        original_ranks = np.argsort(np.argsort(-np.array(scores))) + 1
        data.append([f"Applicant_{applicant}", quality, average_evaluation, scores, list(original_ranks)])

    # Create DataFrame
    df = pd.DataFrame(data, columns=["Applicant", "Applicant Quality", "Average Evaluation", "Original Scores", "Rank of Original Scores"])

    # Random tie-breaking function
    def random_tie_breaking(df, column):
        # Shuffle rows with the same value in the specified column
        ranks = df[column].rank(method='first', ascending=False)
        ties = df.duplicated(column, keep=False)
        shuffled_ranks = ranks[ties].sample(frac=1).sort_index()
        ranks[ties] = shuffled_ranks
        return ranks

    # Apply random tie-breaking to rankings
    df['Rank of Evaluation'] = random_tie_breaking(df, 'Average Evaluation').astype(int)
    df['Rank of Applicant Quality'] = random_tie_breaking(df, 'Applicant Quality').astype(int)

    return df

# # Example usage with specified alpha and beta values
# df_results = simulate_applicant_judging(num_applicants=100, num_judges=10, ratings_per_applicant=5, alpha=4, beta=4, judgment_coarse_graining=10)
# df_results.head(30)  # Displaying the top 30 rows for brevity




# df_results.sort_values(by='Rank of Evaluation').head(30)

import pandas as pd

def summarize_simulation_runs(num_runs, num_applicants, num_judges, ratings_per_applicant, top_n, alpha, beta,
                               judge_error=1, judgment_coarse_graining=False):
    """
    Runs the applicant judging simulation multiple times and summarizes how often each candidate by quality was in the top n.

    :param num_runs: Number of times to run the simulation.
    :param num_applicants: Number of applicants to be judged.
    :param num_judges: Number of judges available for judging.
    :param ratings_per_applicant: Number of ratings each applicant should receive.
    :param top_n: Number of top positions to consider in the summary.
    :param applicant_std_dev: Standard deviation for the quality of applicants.
    :param judge_error_std_dev: Standard deviation for the judge's margin of error.
    :param judgment_coarse_graining: Number of buckets for coarse graining or False to disable.
    :return: A Pandas DataFrame summarizing the results.
    """
    # Initialize counts for each quality-ranked candidate in top n positions
    top_n_counts = pd.DataFrame(0, index=range(1, num_applicants + 1), columns=[f'Top {i}' for i in range(1, top_n + 1)])

    for _ in range(num_runs):
        df_results = simulate_applicant_judging(num_applicants, num_judges, ratings_per_applicant,
                                                alpha=alpha, beta=beta, judge_error=judge_error, judgment_coarse_graining=judgment_coarse_graining)
        # Sort by Rank of Applicant Quality
        sorted_by_quality = df_results.sort_values(by='Applicant Quality', ascending=False).reset_index()
        # Sort by Rank of Evaluation
        sorted_by_evaluation = df_results.sort_values(by='Rank of Evaluation').reset_index()

        for i in range(top_n):
            # Find which quality-ranked candidate is in this top evaluation position
            quality_rank = sorted_by_quality[sorted_by_evaluation.loc[i, 'Applicant'] == sorted_by_quality['Applicant']].index[0] + 1
            top_n_counts.loc[quality_rank, f'Top {i+1}'] += 1

    return top_n_counts

# Example usage
# num_runs = 1000  # Number of simulation runs
# top_n_results = summarize_simulation_runs(num_runs=num_runs, num_applicants=100, num_judges=5, ratings_per_applicant=3,
#                                           top_n=5,alpha=2, beta=1,judge_error=4, judgment_coarse_graining=False)
# top_n_results



import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap, LinearSegmentedColormap


def plot_top_n_results(top_n_results, num_runs):
    """
    Plots a stacked bar chart of the top-n results, filling out what's missing to num_runs in grey.

    :param top_n_results: DataFrame containing the counts of each quality-ranked candidate in top n positions.
    :param num_runs: Total number of simulation runs.
    """

    base_cmap = cmaps.green_green1_r.cut(0.10, 'right').discrete(top_n_results.shape[1])
    newcolors = base_cmap(np.linspace(0, 1, top_n_results.shape[1]))

    # Define a new color (pink) and add it to the colormap
    pink = np.array([178/256, 171/256, 165/256, 1])
    newcolors = np.vstack([newcolors, pink])
    # Create a new ListedColormap
    newcmp = ListedColormap(newcolors)

    # Calculate the missing counts to fill up to num_runs
    missing_counts = num_runs - top_n_results.sum(axis=1)

    # Prepare data for stacked bar chart
    data_to_plot = top_n_results.copy()
    data_to_plot['Missing'] = missing_counts

    # Create a figure and axis for plotting
    fig, ax = plt.subplots(figsize=(12, 8))
    # Plot stacked bar chart
    data_to_plot.head(top_n_results.shape[1]).plot(kind='bar', stacked=True, colormap=newcmp, alpha=.9, ax=ax)
    # Plot settings
    ax.set_title('How often did the actually best get chosen?', loc='right')  # Right-align title
    ax.set_xlabel('Real Applicant Rank')
    ax.set_ylabel('Selected in this many simulation runs')

    # Conditionally add legend
    if top_n_results.shape[1] <= 5:
        labels = [label.replace("Top", "Rank") for label in top_n_results.columns] + ['Not chosen']

        ax.legend(labels=labels, title='Rank in Evaluation', loc='lower center', bbox_to_anchor=(0.5, -0.2), ncol=top_n_results.shape[1]+1)  # Legend below the chart
    plt.tight_layout()
    return fig

    # plt.show()
# plot = plot_top_n_results(top_n_results, num_runs)

# num_applicants=100
# num_judges=10
# ratings_per_applicant=5
# top_n=5
# applicant_std_dev=20
# judge_error_std_dev=1
# judgment_coarse_graining=6

# top_n_counts = pd.DataFrame(0, index=range(1, num_applicants + 1), columns=[f'Top {i}' for i in range(1, top_n + 1)])

# df_results = simulate_applicant_judging(num_applicants, num_judges, ratings_per_applicant,
#                                         applicant_std_dev, judge_error_std_dev, judgment_coarse_graining)
# display(df_results.sort_values(by='Rank of Evaluation').head(10))
# # Sort by Rank of Applicant Quality
# sorted_by_quality = df_results.sort_values(by='Applicant Quality', ascending=False).reset_index()
# # Sort by Rank of Evaluation
# sorted_by_evaluation = df_results.sort_values(by='Rank of Evaluation').reset_index()

# for i in range(top_n):
#     # Find which quality-ranked candidate is in this top evaluation position
#     quality_rank = sorted_by_quality[sorted_by_evaluation.loc[i, 'Applicant'] == sorted_by_quality['Applicant']].index[0] + 1
#     top_n_counts.loc[quality_rank, f'Top {i+1}'] += 1

# display(top_n_counts.head(15))

# import numpy as np
# import matplotlib.pyplot as plt
# import seaborn as sns
# from scipy.stats import gaussian_kde

# def visualize_applicant_and_judge_distributions(alpha, beta, judge_error=1):
#     """
#     Visualizes the distribution of applicants' qualities and an example of a judge's evaluations for a random applicant
#     using density estimates with normalized heights.

#     :param applicant_std_dev: Standard deviation for the quality of applicants.
#     :param judge_error_std_dev: Standard deviation for the judge's margin of error.
#     """
#     # Generate applicant qualities
#     applicant_qualities = np.random.beta(alpha, beta, 5000) * 100

#     # Choose a random applicant for judge's evaluation
#     random_applicant_quality = np.random.choice(applicant_qualities)
#     judge_evaluations = np.random.normal(random_applicant_quality, judge_error, 5000)

#     # Calculate KDEs and find peak values
#     kde_applicant = gaussian_kde(applicant_qualities)
#     kde_judge = gaussian_kde(judge_evaluations)
#     x = np.linspace(0, 100, 1000)
#     kde_applicant_vals = kde_applicant(x)
#     kde_judge_vals = kde_judge(x)
#     peak_applicant = np.max(kde_applicant_vals)
#     peak_judge = np.max(kde_judge_vals)
#     scale_factor = peak_applicant / peak_judge

#     # Plotting
#     plt.figure(figsize=(12, 6))

#     # Plot for distribution of all applicants
#     sns.lineplot(x=x, y=kde_applicant_vals, color="blue", label='Applicant Qualities')
#     plt.fill_between(x, kde_applicant_vals, color="blue", alpha=0.3)
#     plt.title('Distribution of Applicant Qualities')
#     plt.xlabel('Quality')
#     plt.ylabel('Normalized Density')
#     plt.legend()
#     plt.xlim(0, 100)

#     # # Plot for distribution of a single applicant's evaluations
#     # sns.lineplot(x=x, y=kde_judge_vals * scale_factor, color='orange', label='Judge Evaluations')
#     # plt.fill_between(x, kde_judge_vals * scale_factor, color="orange", alpha=0.3)
#     # plt.title('Distribution of a Judge\'s Evaluations for a Chosen Applicant')
#     # plt.xlabel('Evaluation Score')
#     # plt.ylabel('Normalized Density')
#     # plt.legend()
#     # plt.xlim(0, 100)

#     plt.tight_layout()
#     plt.show()

# # Example usage
# visualize_applicant_and_judge_distributions(alpha=2, beta=1, judge_error=5)

import gradio as gr
import matplotlib.pyplot as plt
from io import BytesIO

from scipy.stats import beta

# Function to plot beta distribution
def plot_beta_distribution(a, b, judgement_variability):
    x = np.linspace(0, 1, 1000)
    y = beta.pdf(x, a, b)

    fig, ax = plt.subplots(figsize=(7, 3))  # Figure size
    plt.fill_between(np.linspace(0, 100, 1000), y, color="#ee4d5a", alpha=0.8)
    # plt.title('Distribution of Applicant Qualities')
    plt.xlabel('True Applicants Quality-Distribution')
    plt.xlim(0, 100)
    ax.set_yticklabels([])  # Remove y-axis labels

    # Drawing the line
    line_length = 2 * judgement_variability
    line_x = [50 - line_length/2, 50 + line_length/2]
    plt.plot(line_x, [0, 0], color='black', linewidth=2)

    # Labeling the line
    plt.text(np.mean(line_x), 0.02, 'Judgement Variability', ha='center', va='bottom', color='black')

    return fig

# Your existing function for running simulation and plotting
def run_simulation_and_plot(num_runs, num_applicants, num_judges, ratings_per_applicant, top_n, alpha, beta, judge_error, judgment_coarse_graining,judgment_coarse_graining_true_false):
    if judgment_coarse_graining_true_false == False:
      judgment_coarse_graining = False
    top_n_results = summarize_simulation_runs(num_runs, num_applicants, num_judges, ratings_per_applicant, top_n, alpha, beta, judge_error, judgment_coarse_graining)
    return plot_top_n_results(top_n_results, num_runs)



intro_html = """
    <h1>On Rankings</h1>
    <p>One of the central experiences of being an academic is the experience of being ranked. We are ranked when we apply for graduate school, or maybe already for a master's degree. We are ranked when we're up for faculty positions. We are ranked when we submit abstracts for conferences. And when we publish papers, we do so, of course, in journals that are ranked. The places where we work, the departments, are ranked, of course, as well. But although rankings apparently are catnip to academics, and probably everybody else as well, we do have some agreement. Most people probably share the intuition that there's something weird or iffy about rankings, and the suspicion that maybe often they are not as informative as there are some beings absolutely everywhere who suggest.</p>
"""
comment_distribution_image = """<p>This is the disrtribution from which our applicants will be sampled:</p>"""

# Building the interface
with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
  with gr.Column():
    gr.HTML(intro_html)
    with gr.Row():
      with gr.Column():
        run_button = gr.Button("Run Simulations!")

        # control applicant distribution
        with gr.Group():
          alpha_slider = gr.Slider(0.1, 5, step=0.1, value=1.4, label="Alpha (β Distribution)")
          beta_slider = gr.Slider(0.1, 5, step=0.1,value=2.7, label="Beta (β Distribution)")
        # simumlation-settings:
        num_applicants = gr.Slider(10, 300, step=10, value=100, label="Number of Applicants")
        num_judges = gr.Slider(1, 100, step=1, value=7, label="Number of Judges")
        ratings_per_applicant = gr.Slider(1, 5, step=1, value=3, label="Ratings per Applicant", info='how many different ratings each application gets.')
        top_n = gr.Slider(1, 40, step=1, value=5, label="Top N")

        judge_error = gr.Slider(0, 10, step=1, value=2, label="Judge Error")
        judgment_coarse_graining_true_false = gr.Checkbox(value= True, label="Coarse grain judgements.")
        judgment_coarse_graining = gr.Slider(0, 30, step=1, value=7, label="Coarse Graining Factor")
        num_runs = gr.Slider(10, 1000, step=10,value=100, label="Number of Runs")

        # The button to run the simulation
    # Sliders for alpha and beta parameters of the beta distribution

      with gr.Column():
        with gr.Group():
          beta_plot = gr.Plot(label="Applicants quality distribution")
          gr.HTML(comment_distribution_image)

        # Your existing plot output
        plot_output = gr.Plot(label="Simulation Results",show_label=True)
#https://discuss.huggingface.co/t/gradio-changing-background-colour-in-all-devices/42519

    # Function call on button click
    run_button.click(
        run_simulation_and_plot,
        inputs=[num_runs, num_applicants, num_judges, ratings_per_applicant, top_n, alpha_slider, beta_slider, judge_error, judgment_coarse_graining,judgment_coarse_graining_true_false],
        outputs=[plot_output]
    )

    alpha_slider.change(plot_beta_distribution, inputs=[alpha_slider, beta_slider,judge_error], outputs=[beta_plot])
    beta_slider.change(plot_beta_distribution, inputs=[alpha_slider, beta_slider,judge_error], outputs=[beta_plot])
    judge_error.change(plot_beta_distribution, inputs=[alpha_slider, beta_slider,judge_error], outputs=[beta_plot])

    demo.load(plot_beta_distribution, inputs=[alpha_slider, beta_slider,judge_error], outputs=[beta_plot])

if __name__ == "__main__":
    demo.launch(debug=True)