import json
import re

import gradio as gr
import numpy
import pandas as pd

from src.display.css_html_js import custom_css
from src.about import (
    INTRODUCTION_TEXT,
    TITLE,
    AUTHORS,
)
from src.display.formatting import make_clickable_model
from plot_results import create_performance_plot

demo = gr.Blocks(css=custom_css)
with demo:
    gr.HTML(TITLE)
    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

    NUMBER_OF_QUESTIONS = 171.0

    # load dataframe from csv
    # leaderboard_df = pd.read_csv("benchmark_results.csv")
    leaderboard_df = []
    with open("benchmark_results.csv", "r") as f:
        header = f.readline().strip().split(",")
        header = [h.strip() for h in header]
        for i, line in enumerate(f):
            leaderboard_df.append(line.strip().split(",", 13))

    metadata = json.load(open('metadata.json'))
    for k, v in list(metadata.items()):
        metadata[k.split(",")[0]] = v

    # create dataframe from list and header
    leaderboard_df = pd.DataFrame(leaderboard_df, columns=header)
    # filter column with value eq-bench_v2_pl
    print(header)
    leaderboard_df = leaderboard_df[(leaderboard_df["Benchmark Version"] == "eq-bench_v2_pl") | (
            leaderboard_df["Benchmark Version"] == 'eq-bench_pl')]
    # fix: ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

    # leave only defined columns
    leaderboard_df = leaderboard_df[["Model Path", "Benchmark Score", "Num Questions Parseable", "Error"]]


    # create new column with model name
    def parse_parseable(x):
        if x["Num Questions Parseable"] == 'FAILED':
            m = re.match(r'(\d+)\.0 questions were parseable', x["Error"])
            return m.group(1)
        return x["Num Questions Parseable"]


    leaderboard_df["Num Questions Parseable"] = leaderboard_df[["Num Questions Parseable", "Error"]].apply(
        lambda x: parse_parseable(x), axis=1)

    def fraction_to_percentage(numerator: float, denominator: float) -> float:
        return (numerator / denominator) * 100

    leaderboard_df["Num Questions Parseable"] = leaderboard_df["Num Questions Parseable"].apply(lambda x: fraction_to_percentage(float(x), NUMBER_OF_QUESTIONS))

    def get_params(model_name):
        if model_name in metadata:
            return metadata[model_name]
        else:
            print(model_name)
        return numpy.nan


    leaderboard_df["Params"] = leaderboard_df["Model Path"].apply(lambda x: get_params(x))

    # move column order
    leaderboard_df = leaderboard_df[["Model Path", "Params", "Benchmark Score", "Num Questions Parseable", 'Error']]

    # change value of column to nan
    leaderboard_df["Benchmark Score"] = leaderboard_df["Benchmark Score"].replace('FAILED', numpy.nan)

    #scale Benchmark Score by Num Questions Parseable*100
    leaderboard_df["Benchmark Score"] = leaderboard_df["Benchmark Score"].astype(float) * ((leaderboard_df["Num Questions Parseable"].astype(float) / 100))

    # set datatype of column
    leaderboard_df["Benchmark Score"] = leaderboard_df["Benchmark Score"].astype(float)
    leaderboard_df["Num Questions Parseable"] = leaderboard_df["Num Questions Parseable"].astype(float)

    # set nan if value of column is less than 0
    leaderboard_df.loc[leaderboard_df["Benchmark Score"] < 0, "Benchmark Score"] = 0

    # sort by 2 columns
    leaderboard_df = leaderboard_df.sort_values(by=["Benchmark Score", "Num Questions Parseable"],
                                                ascending=[False, False])
    
    # Print model names and scores to console before HTML formatting
    print("\n===== MODEL RESULTS =====")
    for index, row in leaderboard_df.iterrows():
        print(f"{row['Model Path']}: {row['Benchmark Score']:.2f}")
    print("========================\n")
    
    # Apply HTML formatting for display
    leaderboard_df["Model Path"] = leaderboard_df["Model Path"].apply(lambda x: make_clickable_model(x))
    
    # rename columns
    leaderboard_df = leaderboard_df.rename(columns={"Model Path": "Model"})
    leaderboard_df = leaderboard_df.rename(columns={"Num Questions Parseable": "Percentage Questions Parseable"})
    
    # Set midpoint for gradient coloring based on data ranges

    leaderboard_df_styled = leaderboard_df.style.background_gradient(
        cmap="RdYlGn"
    )
    leaderboard_df_styled = leaderboard_df_styled.background_gradient(
        cmap="RdYlGn_r", 
        subset=['Params'],
        vmax=150
    )

    rounding = {}
    # for col in ["Benchmark Score", "Num Questions Parseable"]:

    rounding["Benchmark Score"] = "{:.2f}"
    rounding["Percentage Questions Parseable"] = "{:.2f}"
    rounding["Params"] = "{:.0f}"
    leaderboard_df_styled = leaderboard_df_styled.format(rounding)

    leaderboard_table = gr.components.Dataframe(
        value=leaderboard_df_styled,
        datatype=['markdown', 'number', 'number', 'number', 'str'],
        elem_id="leaderboard-table",
        interactive=False,
        visible=True,
    )

    # Create and show the performance plot below the table
    fig = create_performance_plot()
    plot = gr.Plot(value=fig, elem_id="performance-plot")

    gr.Markdown(AUTHORS, elem_classes="markdown-text")

    demo.queue(default_concurrency_limit=40).launch()