import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score
import pickle
from huggingface_hub import HfApi, HfFolder
import os

# Load the dataset
df = pd.read_csv('advertising.csv')

# Display the dataset
st.write("## Advertising Dataset")
st.write(df)

# Scatter plots
fig, axs = plt.subplots(1, 3, figsize=(15, 5))
axs[0].scatter(df['TV'], df['Sales'])
axs[0].set_xlabel('TV')
axs[0].set_ylabel('Sales')
axs[0].set_title('TV vs Sales')

axs[1].scatter(df['Radio'], df['Sales'])
axs[1].set_xlabel('Radio')
axs[1].set_ylabel('Sales')
axs[1].set_title('Radio vs Sales')

axs[2].scatter(df['Newspaper'], df['Sales'])
axs[2].set_xlabel('Newspaper')
axs[2].set_ylabel('Sales')
axs[2].set_title('Newspaper vs Sales')

st.pyplot(fig)

# Split the data into training and testing sets
X = df[['TV', 'Radio', 'Newspaper']]
y = df['Sales']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the decision tree regressor
regressor = DecisionTreeRegressor(random_state=42)
regressor.fit(X_train, y_train)

# Predict on the test set
y_pred = regressor.predict(X_test)

# Calculate performance metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Display performance metrics
st.write("## Model Performance")
st.write(f"Mean Squared Error: {mse}")
st.write(f"R-squared: {r2}")

# Save the trained model as a pickle file
model_path = 'decision_tree_regressor.pkl'
with open(model_path, 'wb') as f:
    pickle.dump(regressor, f)

# About this app section
with st.expander("About This App"):
    st.write("""
    This app reads the advertising dataset and trains a decision tree regressor to predict sales based on TV, Radio, and Newspaper advertising budgets.
    The dataset is sourced from Kaggle and contains information on advertising budgets and sales for a product.
    The app displays scatter plots of TV vs Sales, Radio vs Sales, and Newspaper vs Sales.
    After training the model, the app displays the performance metrics of the model.
    The trained model is saved as a pickle file for future use.
    """)

# Code to upload the trained model to Hugging Face using huggingface_hub
try:
    hf_token = os.getenv("HF_TOKEN")
    repo_id = "wvsu-dti-aidev-team/advertising_knn_regressor_model"
except Exception as e:
    st.error(f"Error: {e}")
    st.stop()

api = HfApi()
api.upload_file(
    path_or_fileobj=model_path,
    path_in_repo=model_path,
    repo_id=repo_id,
    repo_type="model",
    token=hf_token
)

st.write("Model uploaded successfully!")