# -*- coding: utf-8 -*-

# author: Martin Fajčík
# modified by: Jan Doležal

import csv
import random
import numpy as np
from bokeh.plotting import figure
from bokeh.models import LabelSet, LogScale, ColumnDataSource, tickers
from bokeh.models import LinearColorMapper, HoverTool
from bokeh.palettes import Turbo256  # A color palette with enough colors

# Function to fit a polynomial curve and return the x and y values of the fitted curve
def fit_curve(x, y, degree=1):
    # Fit a polynomial of given degree
    coeffs = np.polyfit(x, y, degree)
    poly = np.poly1d(coeffs)
    x_fit = np.linspace(min(x), max(x), 100)
    y_fit = poly(x_fit)
    return x_fit, y_fit

# Function to detect and remove outliers using the IQR method
def remove_outliers(x, y):
    x = np.array(x)
    y = np.array(y)
    
    # Calculate Q1 (25th percentile) and Q3 (75th percentile)
    Q1_x, Q3_x = np.percentile(x, [25, 75])
    Q1_y, Q3_y = np.percentile(y, [25, 75])
    
    IQR_x = Q3_x - Q1_x
    IQR_y = Q3_y - Q1_y
    
    # Define bounds for outliers
    lower_bound_x = Q1_x - 1.5 * IQR_x
    upper_bound_x = Q3_x + 1.5 * IQR_x
    lower_bound_y = Q1_y - 1.5 * IQR_y
    upper_bound_y = Q3_y + 1.5 * IQR_y
    
    # Filter out outliers
    mask_x = (x >= lower_bound_x) & (x <= upper_bound_x)
    mask_y = (y >= lower_bound_y) & (y <= upper_bound_y)
    mask = mask_x & mask_y
    
    return x[mask], y[mask], x[~mask], y[~mask]

def get_ldb_records(name_map, csv_file_path):
    model_mapping = {model_title: model_title for model_title in name_map.values()}
    
    ldb_records={}
    with open(csv_file_path, mode='r') as file:
        reader = csv.DictReader(file)
        for row in reader:
            sanitized_name = model_mapping[row['Model']]
            ldb_records[sanitized_name] = row
    
    return ldb_records

def create_scatter_plot_with_curve_with_variances_named(category, variance_across_categories, x, y, sizes, model_names, ldb_records):
    FONTSIZE = 12
    
    # Remove outliers
    x_filtered, y_filtered, x_outliers, y_outliers = remove_outliers(x, y)

    # Scale the variance to a range suitable for marker sizes (e.g., between 5 and 30)
    min_marker_size = 5
    max_marker_size = 30

    def scale_variance_to_size(variance):
        # Scale variance to marker size (linear mapping)
        return min_marker_size + (variance - min(variance_across_categories.values())) * (max_marker_size - min_marker_size) / (max(variance_across_categories.values()) - min(variance_across_categories.values()))

    # Function to get the variance for a given model name
    def get_variance_for_model(model_name):
        return variance_across_categories.get(model_name, 0)  # Default to 0 if model not found

    # Get markers
    filtered_markers = np.array(model_names)[np.in1d(x, x_filtered)]
    outlier_markers = np.array(model_names)[np.in1d(x, x_outliers)]

    # Get marker sizes and variances for the filtered data
    filtered_variances = [get_variance_for_model(mname) for mname in filtered_markers]
    marker_sizes_filtered = [scale_variance_to_size(var) for var in filtered_variances]
    
    # Get marker sizes and variances for the outlier data
    outlier_variances = [get_variance_for_model(mname) for mname in outlier_markers]
    marker_sizes_outliers = [scale_variance_to_size(var) for var in outlier_variances]

    # Assign symbols to the model types
    # https://docs.bokeh.org/en/latest/docs/examples/basic/scatters/markers.html
    _model_type2symbol = {
        'chat': 'circle',
        'pretrained': 'triangle',
        'ensemble': 'star',
    }
    model_type2symbol = lambda model_type: _model_type2symbol.get(model_type, 'diamond')
    
    # Assign symbols to the filtered data points
    filtered_symbols = [model_type2symbol(ldb_records[mname]['Type']) for mname in filtered_markers]
    
    # Assign symbols to the outlier data points
    outlier_symbols = [model_type2symbol(ldb_records[mname]['Type']) for mname in outlier_markers]
    
    # Define a color palette with enough colors
    stride = len(Turbo256) // len(model_names)
    color_palette = list(Turbo256[::stride])  # Adjust this palette size based on the number of data points
    random.shuffle(color_palette)
    
    # Create unique colors for filtered data
    filtered_colors = [color_palette[i % len(color_palette)] for i in range(len(x_filtered))]
    
    # Create unique colors for outliers
    outlier_colors = [color_palette[(i + len(x_filtered)) % len(color_palette)] for i in range(len(x_outliers))]

    # Create ColumnDataSource with filtered data
    source_filtered = ColumnDataSource(data={
        'x': x_filtered,
        'y': y_filtered,
        'sizes': np.array(sizes)[np.in1d(x, x_filtered)],  # Keep original model sizes
        'marker_sizes': marker_sizes_filtered,  # New field for marker sizes based on variance
        'model_names': np.array(model_names)[np.in1d(x, x_filtered)],
        'variance': filtered_variances,  # New field for variance
        'color': filtered_colors,
        'symbol': filtered_symbols
    })
    
    # Create ColumnDataSource with outlier data
    source_outliers = ColumnDataSource(data={
        'x': x_outliers,
        'y': y_outliers,
        'sizes': np.array(sizes)[np.in1d(x, x_outliers)],  # Keep original model sizes
        'marker_sizes': marker_sizes_outliers,  # New field for marker sizes based on variance
        'model_names': np.array(model_names)[np.in1d(x, x_outliers)],
        'variance': outlier_variances,  # New field for variance
        'color': outlier_colors,
        'symbol': outlier_symbols
    })

    # Create a figure for the category
    p = figure(
        output_backend="svg",
        sizing_mode="stretch_width",
        height=800,
        #title=f"{category} vs Model Size vs Variance Across Categories",
        tools="pan,wheel_zoom,box_zoom,save,reset",
        active_scroll="wheel_zoom",
        tooltips=[
            ("Model", "@model_names"), 
            ("Model Size (B parameters)", "@sizes"), 
            ("Variance", "@variance"),  # Added variance to the tooltip
            ("Performance", "@y"),
        ]
    )

    # Plot filtered data with unique colors and scaled marker sizes
    p.scatter('x', 'y', size='marker_sizes', source=source_filtered, fill_alpha=0.6, color='color', marker='symbol')

    # Plot outliers with unique colors and scaled marker sizes
    p.scatter('x', 'y', size='marker_sizes', source=source_outliers, fill_alpha=0.6, color='color', marker='symbol')

    # Fit and plot a curve
    x_fit, y_fit = fit_curve(x_filtered, y_filtered, degree=1)  # You can adjust the degree of the polynomial   

    
    p.line(x_fit, y_fit, line_color='gray', line_width=2, line_dash='dashed')

    # Add labels (with slight offset to avoid overlap)
    p.add_layout(LabelSet(
        x='x',
        y='y',
        text='model_names',
        source=source_filtered,
        x_offset=5,
        y_offset=8,
        text_font_size=f"{FONTSIZE-2}pt",
        text_color='black',
    ))
    
    p.add_layout(LabelSet(
        x='x',
        y='y',
        text='model_names',
        source=source_outliers,
        x_offset=5,
        y_offset=8,
        text_font_size=f"{FONTSIZE-2}pt",
        text_color='black',
    ))


    # Set axis labels
    p.xaxis.axis_label = 'Model Size (B parameters)'
    p.yaxis.axis_label = f'{category}'

    # Set axis label font sizes
    p.xaxis.axis_label_text_font_size = f"{FONTSIZE}pt"  # Set font size for x-axis label
    p.yaxis.axis_label_text_font_size = f"{FONTSIZE}pt"  # Set font size for y-axis label

    # Increase tick label font sizes
    p.xaxis.major_label_text_font_size = f"{FONTSIZE}pt"  # Increase x-axis tick label size
    p.yaxis.major_label_text_font_size = f"{FONTSIZE}pt"  # Increase y-axis tick label size

    p.x_scale = LogScale()
    
    p.xaxis.ticker = tickers.LogTicker()
    p.xaxis.axis_label_text_font_style = "normal"
    p.yaxis.axis_label_text_font_style = "normal"
    
    return p

def create_heatmap(data_matrix, original_scores,
    selected_rows=None, 
    hide_scores_tasks=[], 
    plot_width=None, 
    plot_height=None, 
    x_axis_label="Model", 
    y_axis_label="Task", 
    x_axis_visible=True, 
    y_axis_visible=True, 
):
    FONTSIZE = 9
    
    n_rows, n_cols = data_matrix.shape
    y_axis_size = 0
    x_axis_size = 0
    cell_padding = 3
    cell_width_border = 1 + cell_padding
    #cell_height_border = 4 + cell_padding
    cell_height_border = cell_padding
    cell_size = 22
    if plot_width == None:
        plot_width  = n_rows * cell_size + (n_rows * cell_width_border + y_axis_size if y_axis_visible else 0)
    if plot_height == None:
        plot_height = n_cols * cell_size + (n_cols * cell_height_border + x_axis_size if x_axis_visible else 0)
        plot_height = n_cols * cell_size + (242 if x_axis_visible else 0)

    if selected_rows is not None:
        # Select only the specified rows (models)
        data_matrix = data_matrix[selected_rows]
        original_scores = original_scores[selected_rows]

    # Set up the figure with tasks as x-axis and models as y-axis
    p = figure(
        output_backend="svg",
        sizing_mode="fixed",
        width=plot_width,
        height=plot_height,
        x_range=list(data_matrix.index),
        y_range=list(data_matrix.columns),
        toolbar_location="below",
        tools="pan,wheel_zoom,box_zoom,reset,save",
        active_drag=None,
        x_axis_label=x_axis_label,
        y_axis_label=y_axis_label,
    )

    # Create the color mapper for the heatmap
    color_mapper = LinearColorMapper(palette='Viridis256', low=0, high=1)  # Light for low values, dark for high

    # Flatten the matrix for Bokeh plotting
    heatmap_data = {
        'x': [],
        'y': [],
        'colors': [],
        'model_names': [],  # Updated: Reflects model names now
        'scores': [],
    }
    label_data = {
        'x': [],
        'y': [],
        'value': [],
        'text_color': [],  # New field for label text colors
    }
    
    # Iterate through the data_matrix to populate heatmap and label data
    for row_idx, (model_name, task_scores) in enumerate(data_matrix.iterrows()):
        for col_idx, score in enumerate(task_scores):
            heatmap_data['x'].append(model_name)  # Model goes to x-axis
            heatmap_data['y'].append(data_matrix.columns[col_idx])  # Task goes to y-axis
            heatmap_data['colors'].append(score)
            heatmap_data['model_names'].append(model_name)  # Model names added to hover info

            # Get the original score
            original_score = original_scores.loc[model_name, data_matrix.columns[col_idx]]
            plot_score = data_matrix.loc[model_name, data_matrix.columns[col_idx]]
            heatmap_data['scores'].append(original_score)
            task_name = data_matrix.columns[col_idx]

            if task_name not in hide_scores_tasks:
                label_data['x'].append(model_name)
                label_data['y'].append(task_name)
                label_data['value'].append(round(original_score))  # Round the score

                # Determine text color based on score
                if plot_score <= 0.6:  # Threshold for light/dark text
                    label_data['text_color'].append('white')  # Light color for lower scores
                else:
                    label_data['text_color'].append('black')  # Dark color for higher scores

    heatmap_source = ColumnDataSource(heatmap_data)
    label_source = ColumnDataSource(label_data)

    # Create the heatmap
    p.rect(x='x', y='y', width=1, height=1, source=heatmap_source,
           line_color=None, fill_color={'field': 'colors', 'transform': color_mapper})

    # Add HoverTool for interactivity
    hover = HoverTool()
    hover.tooltips = [(x_axis_label, "@x"), (y_axis_label, "@y"), ("DS", "@scores")]  # Updated tooltip
    p.add_tools(hover)

    # Add labels with dynamic text color
    labels = LabelSet(x='x', y='y', text='value', source=label_source,
                      text_color='text_color', text_align='center', text_baseline='middle',
                      text_font_size=f"{FONTSIZE}pt")
    p.add_layout(labels)

    # Customize the plot appearance
    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = None
    p.xaxis.major_label_orientation = "vertical"
    p.yaxis.major_label_text_font_size = f"{FONTSIZE}pt"
    p.xaxis.major_label_text_font_size = f"{FONTSIZE}pt"

    # Set the axis label font size
    p.xaxis.axis_label_text_font_size = f"{FONTSIZE + 5}pt"  # Set font size for x-axis label
    p.yaxis.axis_label_text_font_size = f"{FONTSIZE + 5}pt"  # Set font size for y-axis label
    p.xaxis.axis_label_text_font_style = "normal"  # Set x-axis label to normal
    p.yaxis.axis_label_text_font_style = "normal"  # Set y-axis label to normal
    
    # Hide the axis labels
    p.xaxis.visible = x_axis_visible
    p.yaxis.visible = y_axis_visible

    return p

# EOF