import gradio as gr
import pandas as pd
import os
import re
from datetime import datetime
from huggingface_hub import hf_hub_download
from huggingface_hub import HfApi, HfFolder

LEADERBOARD_FILE = "leaderboard.csv"
GROUND_TRUTH_FILE = "ground_truth.csv"
LAST_UPDATED = datetime.now().strftime("%B %d, %Y")

# Ensure authentication and suppress warnings
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
HF_TOKEN = os.getenv("HF_TOKEN")
if not HF_TOKEN:
    raise ValueError("HF_TOKEN environment variable is not set or invalid.")

def initialize_leaderboard_file():
    """
    Ensure the leaderboard file exists and has the correct headers.
    """
    if not os.path.exists(LEADERBOARD_FILE):
        pd.DataFrame(columns=[
            "Model Name", "Overall Accuracy", "Valid Accuracy",
            "Correct Predictions", "Total Questions", "Timestamp"
        ]).to_csv(LEADERBOARD_FILE, index=False)
    elif os.stat(LEADERBOARD_FILE).st_size == 0:
        pd.DataFrame(columns=[
            "Model Name", "Overall Accuracy", "Valid Accuracy",
            "Correct Predictions", "Total Questions", "Timestamp"
        ]).to_csv(LEADERBOARD_FILE, index=False)

def clean_answer(answer):
    if pd.isna(answer):
        return None
    answer = str(answer)
    clean = re.sub(r'[^A-Da-d]', '', answer)
    return clean[0].upper() if clean else None


def update_leaderboard(results):
    """
    Append new submission results to the leaderboard file and push updates to the Hugging Face repository.
    """
    new_entry = {
        "Model Name": results['model_name'],
        "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
        "Valid Accuracy": round(results['valid_accuracy'] * 100, 2),
        "Correct Predictions": results['correct_predictions'],
        "Total Questions": results['total_questions'],
        "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    }

    try:
        # Update the local leaderboard file
        new_entry_df = pd.DataFrame([new_entry])
        file_exists = os.path.exists(LEADERBOARD_FILE)
        
        new_entry_df.to_csv(
            LEADERBOARD_FILE,
            mode='a',  # Append mode
            index=False,
            header=not file_exists  # Write header only if the file is new
        )
        print(f"Leaderboard updated successfully at {LEADERBOARD_FILE}")

        # Push the updated file to the Hugging Face repository using HTTP API
        api = HfApi()
        token = HfFolder.get_token()
        
        api.upload_file(
            path_or_fileobj=LEADERBOARD_FILE,
            path_in_repo="leaderboard.csv",
            repo_id="SondosMB/ss",  # Your Space repository
            repo_type="space",
            token=token
        )
        print("Leaderboard changes pushed to Hugging Face repository.")
        
    except Exception as e:
        print(f"Error updating leaderboard file: {e}")


def load_leaderboard():
    if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
        return pd.DataFrame({
            "Model Name": [],
            "Overall Accuracy": [],
            "Valid Accuracy": [],
            "Correct Predictions": [],
            "Total Questions": [],
            "Timestamp": [],
        })
    return pd.read_csv(LEADERBOARD_FILE)

def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
    try:
        ground_truth_path = hf_hub_download(
            repo_id="SondosMB/ground-truth-dataset",
            filename="ground_truth.csv",
            repo_type="dataset",
            use_auth_token=True
        )
        ground_truth_df = pd.read_csv(ground_truth_path)
    except FileNotFoundError:
        return "Ground truth file not found in the dataset repository.", load_leaderboard()
    except Exception as e:
        return f"Error loading ground truth: {e}", load_leaderboard()

    if not prediction_file:
        return "Prediction file not uploaded.", load_leaderboard()

    try:
        predictions_df = pd.read_csv(prediction_file.name)
        merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
        merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)

        valid_predictions = merged_df.dropna(subset=['pred_answer'])
        correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
        total_predictions = len(merged_df)
        total_valid_predictions = len(valid_predictions)

        overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
        valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0

        results = {
            'model_name': model_name if model_name else "Unknown Model",
            'overall_accuracy': overall_accuracy,
            'valid_accuracy': valid_accuracy,
            'correct_predictions': correct_predictions,
            'total_questions': total_predictions,
        }

        if add_to_leaderboard:
            update_leaderboard(results)
            return "Evaluation completed and added to leaderboard.", load_leaderboard()
        else:
            return "Evaluation completed but not added to leaderboard.", load_leaderboard()
  
    except Exception as e:
        return f"Error during evaluation: {str(e)}", load_leaderboard()

initialize_leaderboard_file()

# Function to set default mode
# Function to set default mode
import gradio as gr

# # Ensure CSS is correctly defined
# css_tech_theme = """
# body {
#     background-color: #f4f6fa;
#     color: #333333;
#     font-family: 'Roboto', sans-serif;
#     line-height: 1.8;
# }

# .center-content {
#     display: flex;
#     flex-direction: column;
#     align-items: center;
#     justify-content: center;
#     text-align: center;
#     margin: 30px 0;
#     padding: 20px;
# }

# h1, h2 {
#     color: #5e35b1;
#     margin: 15px 0;
#     text-align: center;
# }
# img {
#     width: 100px;
#     height: 100px;
# }
# """

# # Create the Gradio Interface
# with gr.Blocks(css=css_tech_theme) as demo:
#     gr.Markdown("""
#     <div class="center-content">
#         <h1>🏆 Mobile-MMLU Benchmark Competition</h1>
#         <h2>🌟 Welcome to the Competition</h2>
#         <p>
#             Welcome to the Mobile-MMLU Benchmark Competition. Here you can submit your predictions, 
#             view the leaderboard, and track your performance!
#         </p>
#         <hr>
#     </div>
#     """)


#     with gr.Tabs(elem_id="tabs"):
#         with gr.TabItem("📖 Overview"):
#             gr.Markdown("""
#             **Welcome to the Mobile-MMLU Benchmark Competition! Evaluate mobile-compatible Large Language Models (LLMs) on 16,186 scenario-based and factual questions across 80 fields**.
#             ---
#             ## What is Mobile-MMLU?
#             Mobile-MMLU is a benchmark designed to test the capabilities of LLMs optimized for mobile use. Contribute to advancing mobile AI systems by competing to achieve the highest accuracy.
#             ---
#             ## How It Works
#             1. **Download the Dataset**
#                Access the dataset and instructions on our [GitHub page](https://github.com/your-github-repo).
#             2. **Generate Predictions**
#                Use your LLM to answer the dataset questions. Format your predictions as a CSV file.
#             3. **Submit Predictions**
#                Upload your predictions on this platform.
#             4. **Evaluation**
#                Submissions are scored on accuracy.
#             5. **Leaderboard**
#                View real-time rankings on the leaderboard.
#             ---
#             """)

#         with gr.TabItem("📤 Submission"):
#             with gr.Row():
#                 file_input = gr.File(label="Upload Prediction CSV", file_types=[".csv"], interactive=True)
#                 model_name_input = gr.Textbox(label="Model Name", placeholder="Enter your model name")

#             with gr.Row():
#                 overall_accuracy_display = gr.Number(label="Overall Accuracy", interactive=False)
#                 add_to_leaderboard_checkbox = gr.Checkbox(label="Add to Leaderboard?", value=True)

#             eval_button = gr.Button("Evaluate")
#             eval_status = gr.Textbox(label="Evaluation Status", interactive=False)

#             def handle_evaluation(file, model_name, add_to_leaderboard):
#                 status, leaderboard = evaluate_predictions(file, model_name, add_to_leaderboard)
#                 if leaderboard.empty:
#                     overall_accuracy = 0
#                 else:
#                     overall_accuracy = leaderboard.iloc[-1]["Overall Accuracy"]
#                 return status, overall_accuracy

#             eval_button.click(
#                 handle_evaluation,
#                 inputs=[file_input, model_name_input, add_to_leaderboard_checkbox],
#                 outputs=[eval_status, overall_accuracy_display],
#             )

#         with gr.TabItem("🏅 Leaderboard"):
#             leaderboard_table = gr.Dataframe(
#                 value=load_leaderboard(),
#                 label="Leaderboard",
#                 interactive=False,
#                 wrap=True,
#             )
#             refresh_button = gr.Button("Refresh Leaderboard")
#             refresh_button.click(
#                 lambda: load_leaderboard(),
#                 inputs=[],
#                 outputs=[leaderboard_table],
#             )

#     gr.Markdown(f"Last updated on **{LAST_UPDATED}**")

# demo.launch()

import gradio as gr

# Custom CSS to match website style
# Define CSS to match a modern, professional design
css_tech_theme = """
body {
    font-family: 'Roboto', sans-serif;
    background-color: #f4f6fa;
    color: #333333;
    line-height: 1.8;
    margin: 0;
    padding: 0;
}

.center-content {
    display: flex;
    flex-direction: column;
    align-items: center;
    justify-content: center;
    text-align: center;
    margin: 40px auto;
    padding: 20px;
    background: linear-gradient(135deg, #6a1b9a, #64b5f6);
    color: #ffffff;
    border-radius: 10px;
    max-width: 80%;
    box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
}

.center-content h1 {
    font-size: 3em;
    font-weight: bold;
    margin-bottom: 10px;
}

.center-content h2 {
    font-size: 1.8em;
    margin: 10px 0 20px;
    font-weight: 500;
}

.center-content p {
    font-size: 1.2em;
    margin-bottom: 20px;
    line-height: 1.6;
}

.tabs {
    margin-top: 20px;
}

.gradio-container {
    background: #ffffff;
    border-radius: 10px;
    box-shadow: 0 4px 15px rgba(0, 0, 0, 0.1);
    padding: 20px;
    max-width: 1200px;
    margin: 0 auto;
}

#leaderboard {
    margin: 20px auto;
    border-radius: 10px;
    overflow: hidden;
    border: 1px solid #e5eff2;
    background: #f9f9f9;
}

footer {
    text-align: center;
    padding: 20px;
    background: #6a1b9a;
    color: #ffffff;
    margin-top: 20px;
    font-size: 0.9em;
    border-top: 5px solid #64b5f6;
}
"""

# Create the Gradio Interface
with gr.Blocks(css=css_tech_theme) as demo:
    gr.Markdown("""
    <div class="center-content">
        <h1>🏆 Mobile-MMLU Benchmark Competition</h1>
        <h2>🌟 Welcome to the Competition</h2>
        <p>
            Welcome to the Mobile-MMLU Benchmark Competition. Submit your predictions, 
            view the leaderboard, and track your performance!
        </p>
        <hr>
    </div>
    """)

    with gr.Tabs(elem_id="tabs"):
        with gr.TabItem("📖 Overview"):
            gr.Markdown("""
            <div class="tab-content active">
                <h2>About the Competition</h2>
                <p>
                **Mobile-MMLU** evaluates mobile-optimized LLMs on 16,186 scenario-based and factual questions across 80 fields.
                <br><br> Test your model, submit predictions, and climb the leaderboard!
                </p>
            </div>
            """)

        with gr.TabItem("📤 Submission"):
            with gr.Row():
                file_input = gr.File(label="Upload Prediction CSV", file_types=[".csv"], interactive=True)
                model_name_input = gr.Textbox(label="Model Name", placeholder="Enter your model name")

            with gr.Row():
                overall_accuracy_display = gr.Number(label="Overall Accuracy", interactive=False)
                add_to_leaderboard_checkbox = gr.Checkbox(label="Add to Leaderboard?", value=True)

            eval_button = gr.Button("Evaluate")
            eval_status = gr.Textbox(label="Evaluation Status", interactive=False)

            def handle_evaluation(file, model_name, add_to_leaderboard):
                status, leaderboard = evaluate_predictions(file, model_name, add_to_leaderboard)
                overall_accuracy = leaderboard.iloc[-1]["Overall Accuracy"] if not leaderboard.empty else 0
                return status, overall_accuracy

            eval_button.click(
                handle_evaluation,
                inputs=[file_input, model_name_input, add_to_leaderboard_checkbox],
                outputs=[eval_status, overall_accuracy_display],
            )

        with gr.TabItem("🏅 Leaderboard"):
            leaderboard_table = gr.Dataframe(
                value=load_leaderboard(),
                label="Leaderboard",
                interactive=False,
                wrap=True,
            )
            refresh_button = gr.Button("Refresh Leaderboard")
            refresh_button.click(
                lambda: load_leaderboard(),
                inputs=[],
                outputs=[leaderboard_table],
            )

    gr.Markdown("<footer>Mobile-MMLU Competition | Last Updated: December 2024</footer>")

demo.launch()