louiecerv commited on
Commit
dae7e36
·
1 Parent(s): 81454cf

sync with remote

Browse files
Files changed (2) hide show
  1. app.py +85 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ from sklearn.model_selection import train_test_split
5
+ from sklearn.tree import DecisionTreeRegressor
6
+ from sklearn.metrics import mean_squared_error, r2_score
7
+ import pickle
8
+ from huggingface_hub import HfApi, HfFolder
9
+
10
+ # Load the dataset
11
+ df = pd.read_csv('advertising.csv')
12
+
13
+ # Display the dataset
14
+ st.write("## Advertising Dataset")
15
+ st.write(df)
16
+
17
+ # Scatter plots
18
+ fig, axs = plt.subplots(1, 3, figsize=(15, 5))
19
+ axs[0].scatter(df['TV'], df['Sales'])
20
+ axs[0].set_xlabel('TV')
21
+ axs[0].set_ylabel('Sales')
22
+ axs[0].set_title('TV vs Sales')
23
+
24
+ axs[1].scatter(df['Radio'], df['Sales'])
25
+ axs[1].set_xlabel('Radio')
26
+ axs[1].set_ylabel('Sales')
27
+ axs[1].set_title('Radio vs Sales')
28
+
29
+ axs[2].scatter(df['Newspaper'], df['Sales'])
30
+ axs[2].set_xlabel('Newspaper')
31
+ axs[2].set_ylabel('Sales')
32
+ axs[2].set_title('Newspaper vs Sales')
33
+
34
+ st.pyplot(fig)
35
+
36
+ # Split the data into training and testing sets
37
+ X = df[['TV', 'Radio', 'Newspaper']]
38
+ y = df['Sales']
39
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
40
+
41
+ # Train the decision tree regressor
42
+ regressor = DecisionTreeRegressor(random_state=42)
43
+ regressor.fit(X_train, y_train)
44
+
45
+ # Predict on the test set
46
+ y_pred = regressor.predict(X_test)
47
+
48
+ # Calculate performance metrics
49
+ mse = mean_squared_error(y_test, y_pred)
50
+ r2 = r2_score(y_test, y_pred)
51
+
52
+ # Display performance metrics
53
+ st.write("## Model Performance")
54
+ st.write(f"Mean Squared Error: {mse}")
55
+ st.write(f"R-squared: {r2}")
56
+
57
+ # Save the trained model as a pickle file
58
+ model_path = 'decision_tree_regressor.pkl'
59
+ with open(model_path, 'wb') as f:
60
+ pickle.dump(regressor, f)
61
+
62
+ # About this app section
63
+ with st.expander("About This App"):
64
+ st.write("""
65
+ This app reads the advertising dataset and trains a decision tree regressor to predict sales based on TV, Radio, and Newspaper advertising budgets.
66
+ The dataset is sourced from Kaggle and contains information on advertising budgets and sales for a product.
67
+ The app displays scatter plots of TV vs Sales, Radio vs Sales, and Newspaper vs Sales.
68
+ After training the model, the app displays the performance metrics of the model.
69
+ The trained model is saved as a pickle file for future use.
70
+ """)
71
+
72
+ # Code to upload the trained model to Hugging Face using huggingface_hub
73
+ hf_token = os.getenv("HF_TOKEN")
74
+ repo_id = "wvsu-dti-aidev-team/advertising_knn_regressor_model"
75
+
76
+ api = HfApi()
77
+ api.upload_file(
78
+ path_or_fileobj=model_path,
79
+ path_in_repo=model_path,
80
+ repo_id=repo_id,
81
+ repo_type="model",
82
+ token=hf_token
83
+ )
84
+
85
+ st.write("Model uploaded successfully!")
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ matplotlib
4
+ scikit-learn
5
+ requests
6
+ huggingface_hub