computerscience-person's picture
New AI app.
ff9549f
raw
history blame
4.39 kB
import marimo
__generated_with = "0.11.13"
app = marimo.App(width="medium")
@app.cell
def _():
import marimo as mo
import polars as pl
return mo, pl
@app.cell
def _(pl):
dataset = pl.read_csv('./dataset/colorectal_cancer_dataset.csv')
dataset
return (dataset,)
@app.cell
def _(dataset, pl):
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
encoder = OneHotEncoder(sparse_output=False)
ord_encoder = OrdinalEncoder()
encoded = encoder.fit_transform(dataset.select(['Obesity_BMI', 'Cancer_Stage']))
ord_encoded = ord_encoder.fit_transform(dataset.select('Survival_5_years'))
encoded_features = encoder.get_feature_names_out(['Obesity_BMI', 'Cancer_Stage'])
ord_encoded_features = ord_encoder.get_feature_names_out(['Survival_5_years'])
encoded_schema = {name: pl.Int8 for name in encoded_features}
ord_encoded_schema = {name: pl.Int8 for name in ord_encoded_features}
dataset_encoded_parts = pl.DataFrame(encoded, schema=encoded_schema)
dataset_ord_encoded_parts = pl.DataFrame(ord_encoded, schema=ord_encoded_schema)
dataset_encoded = dataset.with_columns(dataset_encoded_parts).with_columns(dataset_ord_encoded_parts)
dataset_encoded
return (
OneHotEncoder,
OrdinalEncoder,
dataset_encoded,
dataset_encoded_parts,
dataset_ord_encoded_parts,
encoded,
encoded_features,
encoded_schema,
encoder,
ord_encoded,
ord_encoded_features,
ord_encoded_schema,
ord_encoder,
)
@app.cell
def _(dataset_encoded, encoded_features, mo):
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import BernoulliNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, classification_report, confusion_matrix
X = dataset_encoded.select(['Age', 'Tumor_Size_mm'] + encoded_features.tolist())
y = dataset_encoded.select(['Survival_5_years'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=33)
logreg = LogisticRegression()
y_pred_logreg = logreg.fit(X_train, y_train).predict(X_test)
bnb = BernoulliNB()
y_pred_bnb = bnb.fit(X_train, y_train).predict(X_test)
dectree = DecisionTreeClassifier()
y_pred_dectree = dectree.fit(X_train, y_train).predict(X_test)
mo.md(f"""
# Logistic Regression
Accuracy score: {accuracy_score(y_test, y_pred_logreg)}
Precision score: {precision_score(y_test, y_pred_logreg)}
Confusion matrix:
```
{confusion_matrix(y_test, y_pred_logreg)}
```
Classification report:
```
{classification_report(y_test, y_pred_logreg)}
```
# Bernoulli Naive Bayes
Accuracy score: {accuracy_score(y_test, y_pred_bnb)}
Precision score: {precision_score(y_test, y_pred_bnb)}
Confusion matrix:
```
{confusion_matrix(y_test, y_pred_bnb)}
```
Classification report:
```
{classification_report(y_test, y_pred_bnb)}
```
# Decision Tree Classifier
Accuracy score: {accuracy_score(y_test, y_pred_dectree)}
Precision score: {precision_score(y_test, y_pred_dectree)}
Confusion matrix:
```
{confusion_matrix(y_test, y_pred_dectree)}
```
Classification report:
```
{classification_report(y_test, y_pred_dectree)}
```
""")
return (
BernoulliNB,
DecisionTreeClassifier,
LogisticRegression,
X,
X_test,
X_train,
accuracy_score,
bnb,
classification_report,
confusion_matrix,
dectree,
logreg,
precision_score,
train_test_split,
y,
y_pred_bnb,
y_pred_dectree,
y_pred_logreg,
y_test,
y_train,
)
@app.cell
def _(dataset_cluster, mo):
import altair as alt
chart1 = alt.Chart(dataset_cluster).mark_circle().encode(
alt.Y('Incidence_Rate_per_100K'),
alt.X('Mortality_Rate_per_100K'),
color='Cluster',
)
mo.ui.altair_chart(chart1)
return alt, chart1
@app.cell
def _():
return
if __name__ == "__main__":
app.run()