computerscience-person commited on
Commit
cdffe0b
·
1 Parent(s): 27db1e5

Add comparison to Decision Tree Classifier.

Browse files
Files changed (1) hide show
  1. app.py +30 -41
app.py CHANGED
@@ -47,13 +47,14 @@ def _(pl):
47
 
48
  @app.cell
49
  def _(mo):
50
- mo.md("""## Naive Bayes' Classifier""")
51
  return
52
 
53
 
54
  @app.cell
55
  def _(dataset_prior_conditions, mo, pl):
56
  from sklearn.naive_bayes import BernoulliNB
 
57
  from sklearn.model_selection import train_test_split
58
  from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
59
 
@@ -63,8 +64,12 @@ def _(dataset_prior_conditions, mo, pl):
63
  )
64
 
65
  bnb = BernoulliNB()
 
66
  y_pred_priors = bnb.fit(X_train_priors, y_train_priors).predict(X_test_priors)
67
- mo.md(f"""
 
 
 
68
  Accuracy : {accuracy_score(y_test_priors, y_pred_priors)}
69
 
70
  Confusion Matrix:
@@ -78,9 +83,25 @@ def _(dataset_prior_conditions, mo, pl):
78
  ```
79
  {classification_report(y_test_priors, y_pred_priors)}
80
  ```
81
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  return (
83
  BernoulliNB,
 
84
  X_priors_NB,
85
  X_test_priors,
86
  X_train_priors,
@@ -88,7 +109,9 @@ def _(dataset_prior_conditions, mo, pl):
88
  bnb,
89
  classification_report,
90
  confusion_matrix,
 
91
  train_test_split,
 
92
  y_pred_priors,
93
  y_priors_NB,
94
  y_test_priors,
@@ -97,43 +120,14 @@ def _(dataset_prior_conditions, mo, pl):
97
 
98
 
99
  @app.cell
100
- def _(X_test_priors, pl, y_pred_priors, y_test_priors):
101
- import altair as alt
102
- alt.data_transformers.enable("vegafusion")
103
-
104
- # X_test_priors, y_pred_priors, y_test_priors
105
- dataset_result_priors = pl.concat([X_test_priors, y_test_priors, pl.DataFrame({"Predicted Diabetes_binary": y_pred_priors})], how="horizontal")
106
- dataset_result_priors1 = dataset_result_priors.select(
107
- (pl.col("HighBP") * 8),
108
- (pl.col("HighChol") * 4),
109
- (pl.col("Stroke") * 2),
110
- pl.exclude(["HighBP", "HighChol", "Stroke"])
111
- )
112
- dataset_result_priors1 = dataset_result_priors1.select(
113
- pl.sum_horizontal(pl.col("HighBP", "HighChol", "Stroke", "HeartDiseaseorAttack")),
114
- pl.col("Diabetes_binary", "Predicted Diabetes_binary")
115
- )
116
- dataset_result_priors2 = dataset_result_priors.select(
117
- pl.exclude(["Diabetes_binary", "Predicted Diabetes_binary"]),
118
- (pl.col("Diabetes_binary") * 2),
119
- pl.col("Predicted Diabetes_binary")
120
- )
121
- dataset_result_priors2 = dataset_result_priors2.select(
122
- pl.col("HighBP", "HighChol", "Stroke", "HeartDiseaseorAttack"),
123
- pl.sum_horizontal(pl.col("Diabetes_binary", "Predicted Diabetes_binary"))
124
- )
125
- dataset_result_priors2.head(10)
126
- return (
127
- alt,
128
- dataset_result_priors,
129
- dataset_result_priors1,
130
- dataset_result_priors2,
131
- )
132
 
133
 
134
  @app.cell
135
  def _(mo):
136
- mo.md(r"""# Diabetes Predictor""")
137
  return
138
 
139
 
@@ -165,10 +159,5 @@ def _(bnb, mo, priors_predict):
165
  return diabetes_or_not, prediction
166
 
167
 
168
- @app.cell
169
- def _():
170
- return
171
-
172
-
173
  if __name__ == "__main__":
174
  app.run()
 
47
 
48
  @app.cell
49
  def _(mo):
50
+ mo.md("""## Testing Classifiers""")
51
  return
52
 
53
 
54
  @app.cell
55
  def _(dataset_prior_conditions, mo, pl):
56
  from sklearn.naive_bayes import BernoulliNB
57
+ from sklearn.tree import DecisionTreeClassifier
58
  from sklearn.model_selection import train_test_split
59
  from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
60
 
 
64
  )
65
 
66
  bnb = BernoulliNB()
67
+ dtc = DecisionTreeClassifier()
68
  y_pred_priors = bnb.fit(X_train_priors, y_train_priors).predict(X_test_priors)
69
+ y_pred_dtc = dtc.fit(X_train_priors, y_train_priors).predict(X_test_priors)
70
+ mo.accordion(
71
+ {
72
+ "Bernoulli NB Metrics": f"""
73
  Accuracy : {accuracy_score(y_test_priors, y_pred_priors)}
74
 
75
  Confusion Matrix:
 
83
  ```
84
  {classification_report(y_test_priors, y_pred_priors)}
85
  ```
86
+ """,
87
+ "Decision Tree Classifier": f"""
88
+ Accuracy : {accuracy_score(y_test_priors, y_pred_dtc)}
89
+
90
+ Confusion Matrix:
91
+
92
+ ```
93
+ {confusion_matrix(y_test_priors, y_pred_dtc)}
94
+ ```
95
+
96
+ Classification Report:
97
+
98
+ ```
99
+ {classification_report(y_test_priors, y_pred_dtc)}
100
+ ```
101
+ """})
102
  return (
103
  BernoulliNB,
104
+ DecisionTreeClassifier,
105
  X_priors_NB,
106
  X_test_priors,
107
  X_train_priors,
 
109
  bnb,
110
  classification_report,
111
  confusion_matrix,
112
+ dtc,
113
  train_test_split,
114
+ y_pred_dtc,
115
  y_pred_priors,
116
  y_priors_NB,
117
  y_test_priors,
 
120
 
121
 
122
  @app.cell
123
+ def _(mo):
124
+ mo.md(r"""Looks like Bernoulli Naive Bayes' performs better on this dataset, as even though the Decision Tree Classifier has a bit better accuracy, the other metrics do give a better score on the BNB overall.""")
125
+ return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
 
128
  @app.cell
129
  def _(mo):
130
+ mo.md(r"""# Diabetes Predictor using BNB""")
131
  return
132
 
133
 
 
159
  return diabetes_or_not, prediction
160
 
161
 
 
 
 
 
 
162
  if __name__ == "__main__":
163
  app.run()