Spaces:

livecodebench
/

code_generation_samples

Running

File size: 2,554 Bytes

976ee8c
 
 
f2d5c0f
 
976ee8c
 
 
 
f2d5c0f
 
61f9bac
f2d5c0f
61f9bac
f2d5c0f
976ee8c
 
 
61f9bac
f2d5c0f
976ee8c
f2d5c0f
 
976ee8c
f2d5c0f
976ee8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f2d5c0f
 
976ee8c
 
 
 
 
 
 
 
 
 
 
 
 
f2d5c0f
976ee8c
 
f2d5c0f
976ee8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24142d6
 
976ee8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61f9bac
f2d5c0f
 
976ee8c

import os
import random
import glob
import json

import numpy as np
from flask import Flask, render_template, request

app = Flask(__name__)


with open("problems.json") as f:
    problems = json.load(f)
    problem_choices = [q["question_title"] for q in problems]

random_idxs = list(range(len(problems)))
random.shuffle(random_idxs)

with open("all_outputs.json") as f:
    all_outputs = json.load(f)
    all_models = list(all_outputs.keys())


num_questions_filtered = len(problems)

all_correctness_by_problem = {
    idx: {model: np.mean(all_outputs[model][idx]["pass1_list"]) for model in all_models}
    for idx in random_idxs
}


def calculate_color(performance):
    # Convert performance to a value between 0 and 1
    # Calculate the red and green components of the color
    if performance > 0.75:
        return f"rgba(0, 150, 0, 0.5)"
    elif performance > 0.5:
        return f"rgba(50, 150, 0, {performance})"
    elif performance > 0.25:
        return f"rgba(150, 50, 0, {1-performance})"
    else:
        return f"rgba(150, 0, 0,  0.5)"


all_evaluations_by_problem_colored = [
    (
        trueidx,
        {
            model: {
                "correctness": f"{all_correctness_by_problem[idx][model]*100:.1f}",
                "correctness_color": calculate_color(
                    all_correctness_by_problem[idx][model]
                ),
            }
            for model in all_models
        },
        problems[idx]["difficulty"],
    )
    for trueidx, idx in enumerate(random_idxs)
]

all_data_for_view_formatted = {
    model: [
        [{"code": a, "pass1": b} for a, b in zip(row["code_list"], row["pass1_list"])]
        # print(row)
        for idx in random_idxs
        for row in [resp[idx]]
    ]
    for model, resp in all_outputs.items()
}


@app.route("/")
def home():
    # Fetch your data here
    print(all_models)
    return render_template(
        "index.html", models=all_models, problems=all_evaluations_by_problem_colored
    )


@app.route("/problem/<int:problem_idx>")
def problem(problem_idx):
    # Fetch your data here

    data = {
        model: all_data_for_view_formatted[model][problem_idx] for model in all_models
    }
    evaluation = all_evaluations_by_problem_colored[problem_idx][1]
    question = problems[problem_idx]

    # print(data)

    return render_template(
        "problem.html",
        problem_idx=problem_idx,
        evaluation=evaluation,
        models=all_models,
        question=question,
        data=data,
    )


if __name__ == "__main__":
    app.run()