File size: 2,554 Bytes
976ee8c
 
 
f2d5c0f
 
976ee8c
 
 
 
f2d5c0f
 
61f9bac
f2d5c0f
61f9bac
f2d5c0f
976ee8c
 
 
61f9bac
f2d5c0f
976ee8c
f2d5c0f
 
976ee8c
f2d5c0f
976ee8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f2d5c0f
 
976ee8c
 
 
 
 
 
 
 
 
 
 
 
 
f2d5c0f
976ee8c
 
f2d5c0f
976ee8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24142d6
 
976ee8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61f9bac
f2d5c0f
 
976ee8c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import os
import random
import glob
import json

import numpy as np
from flask import Flask, render_template, request

app = Flask(__name__)


with open("problems.json") as f:
    problems = json.load(f)
    problem_choices = [q["question_title"] for q in problems]

random_idxs = list(range(len(problems)))
random.shuffle(random_idxs)

with open("all_outputs.json") as f:
    all_outputs = json.load(f)
    all_models = list(all_outputs.keys())


num_questions_filtered = len(problems)

all_correctness_by_problem = {
    idx: {model: np.mean(all_outputs[model][idx]["pass1_list"]) for model in all_models}
    for idx in random_idxs
}


def calculate_color(performance):
    # Convert performance to a value between 0 and 1
    # Calculate the red and green components of the color
    if performance > 0.75:
        return f"rgba(0, 150, 0, 0.5)"
    elif performance > 0.5:
        return f"rgba(50, 150, 0, {performance})"
    elif performance > 0.25:
        return f"rgba(150, 50, 0, {1-performance})"
    else:
        return f"rgba(150, 0, 0,  0.5)"


all_evaluations_by_problem_colored = [
    (
        trueidx,
        {
            model: {
                "correctness": f"{all_correctness_by_problem[idx][model]*100:.1f}",
                "correctness_color": calculate_color(
                    all_correctness_by_problem[idx][model]
                ),
            }
            for model in all_models
        },
        problems[idx]["difficulty"],
    )
    for trueidx, idx in enumerate(random_idxs)
]

all_data_for_view_formatted = {
    model: [
        [{"code": a, "pass1": b} for a, b in zip(row["code_list"], row["pass1_list"])]
        # print(row)
        for idx in random_idxs
        for row in [resp[idx]]
    ]
    for model, resp in all_outputs.items()
}


@app.route("/")
def home():
    # Fetch your data here
    print(all_models)
    return render_template(
        "index.html", models=all_models, problems=all_evaluations_by_problem_colored
    )


@app.route("/problem/<int:problem_idx>")
def problem(problem_idx):
    # Fetch your data here

    data = {
        model: all_data_for_view_formatted[model][problem_idx] for model in all_models
    }
    evaluation = all_evaluations_by_problem_colored[problem_idx][1]
    question = problems[problem_idx]

    # print(data)

    return render_template(
        "problem.html",
        problem_idx=problem_idx,
        evaluation=evaluation,
        models=all_models,
        question=question,
        data=data,
    )


if __name__ == "__main__":
    app.run()