|
import marimo |
|
|
|
__generated_with = "0.11.13" |
|
app = marimo.App(width="medium") |
|
|
|
|
|
@app.cell |
|
def _(): |
|
import marimo as mo |
|
import polars as pl |
|
return mo, pl |
|
|
|
|
|
@app.cell |
|
def _(pl): |
|
dataset = pl.read_csv('./dataset/colorectal_cancer_dataset.csv') |
|
dataset |
|
return (dataset,) |
|
|
|
|
|
@app.cell |
|
def _(dataset, pl): |
|
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder |
|
|
|
encoder = OneHotEncoder(sparse_output=False) |
|
ord_encoder = OrdinalEncoder() |
|
encoded = encoder.fit_transform(dataset.select(['Obesity_BMI', 'Cancer_Stage'])) |
|
ord_encoded = ord_encoder.fit_transform(dataset.select('Survival_5_years')) |
|
encoded_features = encoder.get_feature_names_out(['Obesity_BMI', 'Cancer_Stage']) |
|
ord_encoded_features = ord_encoder.get_feature_names_out(['Survival_5_years']) |
|
encoded_schema = {name: pl.Int8 for name in encoded_features} |
|
ord_encoded_schema = {name: pl.Int8 for name in ord_encoded_features} |
|
dataset_encoded_parts = pl.DataFrame(encoded, schema=encoded_schema) |
|
dataset_ord_encoded_parts = pl.DataFrame(ord_encoded, schema=ord_encoded_schema) |
|
dataset_encoded = dataset.with_columns(dataset_encoded_parts).with_columns(dataset_ord_encoded_parts) |
|
dataset_encoded |
|
return ( |
|
OneHotEncoder, |
|
OrdinalEncoder, |
|
dataset_encoded, |
|
dataset_encoded_parts, |
|
dataset_ord_encoded_parts, |
|
encoded, |
|
encoded_features, |
|
encoded_schema, |
|
encoder, |
|
ord_encoded, |
|
ord_encoded_features, |
|
ord_encoded_schema, |
|
ord_encoder, |
|
) |
|
|
|
|
|
@app.cell |
|
def _(dataset_encoded, encoded_features, mo): |
|
from sklearn.linear_model import LogisticRegression |
|
from sklearn.naive_bayes import BernoulliNB |
|
from sklearn.tree import DecisionTreeClassifier |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.metrics import accuracy_score, precision_score, classification_report, confusion_matrix |
|
|
|
X = dataset_encoded.select(['Age', 'Tumor_Size_mm'] + encoded_features.tolist()) |
|
y = dataset_encoded.select(['Survival_5_years']) |
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=33) |
|
logreg = LogisticRegression() |
|
y_pred_logreg = logreg.fit(X_train, y_train).predict(X_test) |
|
bnb = BernoulliNB() |
|
y_pred_bnb = bnb.fit(X_train, y_train).predict(X_test) |
|
dectree = DecisionTreeClassifier() |
|
y_pred_dectree = dectree.fit(X_train, y_train).predict(X_test) |
|
|
|
mo.md(f""" |
|
# Logistic Regression |
|
|
|
Accuracy score: {accuracy_score(y_test, y_pred_logreg)} |
|
|
|
Precision score: {precision_score(y_test, y_pred_logreg)} |
|
|
|
Confusion matrix: |
|
``` |
|
{confusion_matrix(y_test, y_pred_logreg)} |
|
``` |
|
|
|
Classification report: |
|
``` |
|
{classification_report(y_test, y_pred_logreg)} |
|
``` |
|
|
|
# Bernoulli Naive Bayes |
|
|
|
Accuracy score: {accuracy_score(y_test, y_pred_bnb)} |
|
|
|
Precision score: {precision_score(y_test, y_pred_bnb)} |
|
|
|
Confusion matrix: |
|
``` |
|
{confusion_matrix(y_test, y_pred_bnb)} |
|
``` |
|
|
|
Classification report: |
|
``` |
|
{classification_report(y_test, y_pred_bnb)} |
|
``` |
|
|
|
# Decision Tree Classifier |
|
|
|
Accuracy score: {accuracy_score(y_test, y_pred_dectree)} |
|
|
|
Precision score: {precision_score(y_test, y_pred_dectree)} |
|
|
|
Confusion matrix: |
|
``` |
|
{confusion_matrix(y_test, y_pred_dectree)} |
|
``` |
|
|
|
Classification report: |
|
``` |
|
{classification_report(y_test, y_pred_dectree)} |
|
``` |
|
""") |
|
return ( |
|
BernoulliNB, |
|
DecisionTreeClassifier, |
|
LogisticRegression, |
|
X, |
|
X_test, |
|
X_train, |
|
accuracy_score, |
|
bnb, |
|
classification_report, |
|
confusion_matrix, |
|
dectree, |
|
logreg, |
|
precision_score, |
|
train_test_split, |
|
y, |
|
y_pred_bnb, |
|
y_pred_dectree, |
|
y_pred_logreg, |
|
y_test, |
|
y_train, |
|
) |
|
|
|
|
|
@app.cell |
|
def _(dataset_cluster, mo): |
|
import altair as alt |
|
chart1 = alt.Chart(dataset_cluster).mark_circle().encode( |
|
alt.Y('Incidence_Rate_per_100K'), |
|
alt.X('Mortality_Rate_per_100K'), |
|
color='Cluster', |
|
) |
|
mo.ui.altair_chart(chart1) |
|
return alt, chart1 |
|
|
|
|
|
@app.cell |
|
def _(): |
|
return |
|
|
|
|
|
if __name__ == "__main__": |
|
app.run() |
|
|