import streamlit as st import pandas as pd import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeRegressor from sklearn.metrics import mean_squared_error, r2_score import pickle from huggingface_hub import HfApi, HfFolder import os # Load the dataset df = pd.read_csv('advertising.csv') # Display the dataset st.write("## Advertising Dataset") st.write(df) # Scatter plots fig, axs = plt.subplots(1, 3, figsize=(15, 5)) axs[0].scatter(df['TV'], df['Sales']) axs[0].set_xlabel('TV') axs[0].set_ylabel('Sales') axs[0].set_title('TV vs Sales') axs[1].scatter(df['Radio'], df['Sales']) axs[1].set_xlabel('Radio') axs[1].set_ylabel('Sales') axs[1].set_title('Radio vs Sales') axs[2].scatter(df['Newspaper'], df['Sales']) axs[2].set_xlabel('Newspaper') axs[2].set_ylabel('Sales') axs[2].set_title('Newspaper vs Sales') st.pyplot(fig) # Split the data into training and testing sets X = df[['TV', 'Radio', 'Newspaper']] y = df['Sales'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Train the decision tree regressor regressor = DecisionTreeRegressor(random_state=42) regressor.fit(X_train, y_train) # Predict on the test set y_pred = regressor.predict(X_test) # Calculate performance metrics mse = mean_squared_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) # Display performance metrics st.write("## Model Performance") st.write(f"Mean Squared Error: {mse}") st.write(f"R-squared: {r2}") # Save the trained model as a pickle file model_path = 'decision_tree_regressor.pkl' with open(model_path, 'wb') as f: pickle.dump(regressor, f) # About this app section with st.expander("About This App"): st.write(""" This app reads the advertising dataset and trains a decision tree regressor to predict sales based on TV, Radio, and Newspaper advertising budgets. The dataset is sourced from Kaggle and contains information on advertising budgets and sales for a product. The app displays scatter plots of TV vs Sales, Radio vs Sales, and Newspaper vs Sales. After training the model, the app displays the performance metrics of the model. The trained model is saved as a pickle file for future use. """) # Code to upload the trained model to Hugging Face using huggingface_hub try: hf_token = os.getenv("HF_TOKEN") repo_id = "wvsu-dti-aidev-team/advertising_knn_regressor_model" except Exception as e: st.error(f"Error: {e}") st.stop() api = HfApi() api.upload_file( path_or_fileobj=model_path, path_in_repo=model_path, repo_id=repo_id, repo_type="model", token=hf_token ) st.write("Model uploaded successfully!")