import streamlit as st import pandas as pd import streamlit.components.v1 as components metrics_path = "assets/model_metrics.csv" metrics_df = pd.read_csv(metrics_path) def metrics_ui(): st.image("",width=500) st.title("Model Evaluation Metrics") st.subheader("Performance Metrics of Trained Models") st.dataframe(metrics_df) best_model_name = metrics_df.loc[metrics_df['F1 Score'].idxmax(), 'Model'] st.write(f"Best Model: {best_model_name} with an F1 Score of {metrics_df['F1 Score'].max():.2f}") st.header("What is data drift ?") st.write( """ Data Drift is a name for data change that can affect the Machine Learning model performance. There are different types of drift.There can be change in target distribution. For example you have a model that predicts house price based on the property description (no.of rooms,location,etc.,) but there is a change in the market and all properities prices go up. If you don't detect the drift in the target distribution and don't update the model with new targets then your model will predict too low pricies. The chnage in the target distribution is so-called **target distribution.** The next type of drift is **covariate drift**.it is a change in the input data distribution. For example, you have a categorical feature that will start to have new category. """ ) st.header("what to do after data drift detection ?") st.write( """ When data drift is detected that ML model should be updated. There are many ways in which it can be done - all depends on the data. The most striaght forward way is to use all avaiable data samples to train a new model. The other approach might be to use just a new data samples to train the model. There might be also approaches with sample weighting - giving higher weight for fresh data samples and lower weights for old samples. it depends on the data. """ ) st.header("How to detect data drift ?") st.write(""" The data drift can be detected in different ways. The simplest approach is to use statistical tests that compare the distribution of the trainig data and live data (production data). If the differnec between two distribution is significantly then a drift occured. The most popular test are **two-sample, kolmogorov-Smirnov test,Chi square test, jensen-shannon divergence, Wasserstein distance.** The alternative approach might be use Machine Learning model to monitar the data quality. There can be also hybrid approaches. """) st.subheader("Dataset Drift") html = "frontend/reports/report.html" with open(html,'r') as f: html_data= f.read() st.components.v1.html(html_data,scrolling = True,height=700,width= 800) st.subheader("Decison Tree Model Report") html = "frontend/reports/model_report_1.html" with open(html,'r') as f: html_data= f.read() st.components.v1.html(html_data,scrolling = True,height=700,width= 800) st.subheader("RandomForest Model Drift") html = "frontend/reports/model_report_2.html" with open(html,'r') as f: html_data= f.read() st.components.v1.html(html_data,scrolling = True,height=700,width= 800)