from flask import Flask, render_template, request, redirect, url_for, send_file import os import pandas as pd from werkzeug.utils import secure_filename from joblib import load, dump import numpy as np from sklearn.preprocessing import LabelEncoder from time import time from huggingface_hub import hf_hub_download import pickle import uuid from pathlib import Path app = Flask(__name__) # Set the secret key for session management app.secret_key = os.urandom(24) # Configurations UPLOAD_FOLDER = "uploads/" DATA_FOLDER = "data/" MODEL_FOLDER = "models/" # Define the model directory and label encoder directory MODEL_DIR = r'./Model' LABEL_ENCODER_DIR = r'./Label_encoders' # Renamed for clarity # Global file names for outputs; these will be updated per prediction. # Note: we now include a unique id to avoid overwriting. PRED_OUTPUT_FILE = None CLASS_OUTPUT_FILE = None ALLOWED_EXTENSIONS = {'csv', 'xlsx'} # Create directories if they do not exist. app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True) app.config['DATA_FOLDER'] = DATA_FOLDER os.makedirs(app.config['DATA_FOLDER'], exist_ok=True) os.makedirs("data", exist_ok=True) app.config['MODEL_FOLDER'] = MODEL_FOLDER os.makedirs(app.config['MODEL_FOLDER'], exist_ok=True) # ------------------------------ # Load Models and Label Encoders # ------------------------------ # Prediction analysis models loaded from Hugging Face. file_path_1 = hf_hub_download( repo_id="WebashalarForML/Diamond_model_", filename="models_list/mkble/StackingRegressor_best_pipeline_mkble_0_to_1.01.pkl", cache_dir=MODEL_FOLDER ) with open(file_path_1, "rb") as f: makable_model = pickle.load(f) file_path_2 = hf_hub_download( repo_id="WebashalarForML/Diamond_model_", filename="models_list/grd/StackingRegressor_best_pipeline_grd_0_to_1.01.pkl", cache_dir=MODEL_FOLDER ) with open(file_path_2, "rb") as f: grade_model = pickle.load(f) file_path_3 = hf_hub_download( repo_id="WebashalarForML/Diamond_model_", filename="models_list/bygrad/StackingRegressor_best_pipeline_bygrad_0_to_1.01.pkl", cache_dir=MODEL_FOLDER ) with open(file_path_3, "rb") as f: bygrade_model = pickle.load(f) file_path_4 = hf_hub_download( repo_id="WebashalarForML/Diamond_model_", filename="models_list/gia/StackingRegressor_best_pipeline_gia_0_to_1.01.pkl", cache_dir=MODEL_FOLDER ) with open(file_path_4, "rb") as f: gia_model = pickle.load(f) #gia_model = load("Model/StackingRegressor_best_pipeline_mkble_0_to_1.01.pkl") #grade_model = load("Model/StackingRegressor_best_pipeline_grd_0_to_1.01.pkl") #bygrade_model = load("Model/StackingRegressor_best_pipeline_bygrad_0_to_1.01.pkl") #makable_model = load("Model/StackingRegressor_best_pipeline_gia_0_to_1.01.pkl") # Classification models loaded using joblib. col_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_col.joblib')) cts_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_cts.joblib')) cut_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_cut.joblib')) qua_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_qua.joblib')) shp_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_shp.joblib')) blk_eng_to_mkbl_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_mkbl_blk.joblib')) wht_eng_to_mkbl_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_mkbl_wht.joblib')) open_eng_to_mkbl_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_mkbl_open.joblib')) pav_eng_to_mkbl_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_mkbl_pav.joblib')) blk_eng_to_grade_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_grade_blk.joblib')) wht_eng_to_grade_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_grade_wht.joblib')) open_eng_to_grade_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_grade_open.joblib')) pav_eng_to_grade_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_grade_pav.joblib')) blk_eng_to_bygrade_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_bygrade_blk.joblib')) wht_eng_to_bygrade_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_bygrade_wht.joblib')) open_eng_to_bygrade_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_bygrade_open.joblib')) pav_eng_to_bygrade_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_bygrade_pav.joblib')) blk_eng_to_gia_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_gia_blk.joblib')) wht_eng_to_gia_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_gia_wht.joblib')) open_eng_to_gia_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_gia_open.joblib')) pav_eng_to_gia_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_gia_pav.joblib')) # List of label encoder names. encoder_list = [ 'Tag', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo', 'EngNts', 'EngMikly', 'EngLab','EngBlk', 'EngWht', 'EngOpen','EngPav', 'Change_cts_value', 'Change_shape_value', 'Change_quality_value', 'Change_color_value', 'Change_cut_value', 'Change_Blk_Eng_to_Mkbl_value', 'Change_Wht_Eng_to_Mkbl_value', 'Change_Open_Eng_to_Mkbl_value', 'Change_Pav_Eng_to_Mkbl_value', 'Change_Blk_Eng_to_Grd_value', 'Change_Wht_Eng_to_Grd_value', 'Change_Open_Eng_to_Grd_value', 'Change_Pav_Eng_to_Grd_value', 'Change_Blk_Eng_to_ByGrd_value', 'Change_Wht_Eng_to_ByGrd_value', 'Change_Open_Eng_to_ByGrd_value', 'Change_Pav_Eng_to_ByGrd_value', 'Change_Blk_Eng_to_Gia_value', 'Change_Wht_Eng_to_Gia_value', 'Change_Open_Eng_to_Gia_value', 'Change_Pav_Eng_to_Gia_value' ] # Load label encoders using pathlib for cleaner path management. loaded_label_encoder = {} enc_path = Path(LABEL_ENCODER_DIR) for val in encoder_list: encoder_file = enc_path / f"label_encoder_{val}.joblib" loaded_label_encoder[val] = load(encoder_file) # ------------------------------ # Utility: Allowed File Check # ------------------------------ def allowed_file(filename): return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS # ------------------------------ # Routes # ------------------------------ @app.route('/') def index(): return render_template('index.html') @app.route('/predict', methods=['POST']) def predict(): if 'file' not in request.files: print('No file part', 'error') return redirect(url_for('index')) file = request.files['file'] if file.filename == '': print('No selected file', 'error') return redirect(url_for('index')) if file and allowed_file(file.filename): filename = secure_filename(file.filename) filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename) file.save(filepath) # Convert file to DataFrame try: if filename.endswith('.csv'): df = pd.read_csv(filepath) else: df = pd.read_excel(filepath) except Exception as e: print(f'Error reading file: {e}', 'error') return redirect(url_for('index')) # Process the DataFrame and generate predictions and classification analysis. df_pred, dx_class = process_dataframe(df) if df_pred.empty: print("Processed prediction DataFrame is empty. Check the input file and processing logic.", "error") return redirect(url_for('index')) # Save output files with a timestamp and unique id. current_date = pd.Timestamp.now().strftime("%Y-%m-%d") unique_id = uuid.uuid4().hex[:8] global PRED_OUTPUT_FILE, CLASS_OUTPUT_FILE PRED_OUTPUT_FILE = f'data/prediction_output_{current_date}_{unique_id}.csv' CLASS_OUTPUT_FILE = f'data/classification_output_{current_date}_{unique_id}.csv' df_pred.to_csv(PRED_OUTPUT_FILE, index=False) dx_class.to_csv(CLASS_OUTPUT_FILE, index=False) # Redirect to report view; default to prediction report, page 1. return redirect(url_for('report_view', report_type='pred', page=1)) else: print('Invalid file type. Only CSV and Excel files are allowed.', 'error') return redirect(url_for('index')) def process_dataframe(df): try: # Define the columns needed for two parts. required_columns = ['Tag', 'EngCts', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo', 'EngNts', 'EngMikly', 'EngAmt'] required_columns_2 = required_columns + ['EngBlk', 'EngWht', 'EngOpen', 'EngPav'] # Create two DataFrames: one for prediction and one for classification. df_pred = df[required_columns].copy() df_class = df[required_columns_2].fillna("NA").copy() # Transform categorical columns for prediction DataFrame using the label encoders. for col in ['Tag', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo', 'EngNts', 'EngMikly']: try: df_pred[col] = loaded_label_encoder[col].transform(df_pred[col]) except ValueError as e: print(f'Invalid value in column {col}: {e}', 'error') return pd.DataFrame(), pd.DataFrame() # Update the classification DataFrame with the transformed prediction columns. for col in ['Tag', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo', 'EngNts', 'EngMikly']: df_class[col] = df_pred[col] # Transform the extra columns in the classification DataFrame. for col in ['EngBlk', 'EngWht', 'EngOpen', 'EngPav']: try: df_class[col] = loaded_label_encoder[col].transform(df_class[col]) except ValueError as e: print(f'Invalid value in column {col}: {e}', 'error') return pd.DataFrame(), pd.DataFrame() # Convert both DataFrames to float. df_pred = df_pred.astype(float) df_class = df_class.astype(float) # ------------------------- # Prediction Report Section # ------------------------- x = df_pred.copy() df_pred['GIA_Predicted'] = gia_model.predict(x) df_pred['Grade_Predicted'] = grade_model.predict(x) df_pred['ByGrade_Predicted'] = bygrade_model.predict(x) df_pred['Makable_Predicted'] = makable_model.predict(x) df_pred['GIA_Diff'] = df_pred['EngAmt'] - df_pred['GIA_Predicted'] df_pred['Grade_Diff'] = df_pred['EngAmt'] - df_pred['Grade_Predicted'] df_pred['ByGrade_Diff'] = df_pred['EngAmt'] - df_pred['ByGrade_Predicted'] df_pred['Makable_Diff'] = df_pred['EngAmt'] - df_pred['Makable_Predicted'] # ------------------------- # Classification Report Section # ------------------------- x2 = df_class.copy() dx = df_pred.copy() # Start with the prediction data. dx['col_change'] = col_model.predict(x) dx['cts_change'] = cts_model.predict(x) dx['cut_change'] = cut_model.predict(x) dx['qua_change'] = qua_model.predict(x) dx['shp_change'] = shp_model.predict(x) dx['Change_Blk_Eng_to_Mkbl_value'] = blk_eng_to_mkbl_model.predict(x2) dx['Change_Wht_Eng_to_Mkbl_value'] = wht_eng_to_mkbl_model.predict(x2) dx['Change_Open_Eng_to_Mkbl_value'] = open_eng_to_mkbl_model.predict(x2) dx['Change_Pav_Eng_to_Mkbl_value'] = pav_eng_to_mkbl_model.predict(x2) dx['Change_Blk_Eng_to_Grd_value'] = blk_eng_to_grade_model.predict(x2) dx['Change_Wht_Eng_to_Grd_value'] = wht_eng_to_grade_model.predict(x2) dx['Change_Open_Eng_to_Grd_value'] = open_eng_to_grade_model.predict(x2) dx['Change_Pav_Eng_to_Grd_value'] = pav_eng_to_grade_model.predict(x2) dx['Change_Blk_Eng_to_ByGrd_value'] = blk_eng_to_bygrade_model.predict(x2) dx['Change_Wht_Eng_to_ByGrd_value'] = wht_eng_to_bygrade_model.predict(x2) dx['Change_Open_Eng_to_ByGrd_value'] = open_eng_to_bygrade_model.predict(x2) dx['Change_Pav_Eng_to_ByGrd_value'] = pav_eng_to_bygrade_model.predict(x2) dx['Change_Blk_Eng_to_Gia_value'] = blk_eng_to_gia_model.predict(x2) dx['Change_Wht_Eng_to_Gia_value'] = wht_eng_to_gia_model.predict(x2) dx['Change_Open_Eng_to_Gia_value'] = open_eng_to_gia_model.predict(x2) dx['Change_Pav_Eng_to_Gia_value'] = pav_eng_to_gia_model.predict(x2) # Inverse transform classification predictions. dx['col_change'] = loaded_label_encoder['Change_color_value'].inverse_transform(dx['col_change']) dx['cts_change'] = loaded_label_encoder['Change_cts_value'].inverse_transform(dx['cts_change']) dx['cut_change'] = loaded_label_encoder['Change_cut_value'].inverse_transform(dx['cut_change']) dx['qua_change'] = loaded_label_encoder['Change_quality_value'].inverse_transform(dx['qua_change']) dx['shp_change'] = loaded_label_encoder['Change_shape_value'].inverse_transform(dx['shp_change']) dx['Change_Blk_Eng_to_Mkbl_value'] = loaded_label_encoder['Change_Blk_Eng_to_Mkbl_value'].inverse_transform(dx['Change_Blk_Eng_to_Mkbl_value']) dx['Change_Wht_Eng_to_Mkbl_value'] = loaded_label_encoder['Change_Wht_Eng_to_Mkbl_value'].inverse_transform(dx['Change_Wht_Eng_to_Mkbl_value']) dx['Change_Open_Eng_to_Mkbl_value'] = loaded_label_encoder['Change_Open_Eng_to_Mkbl_value'].inverse_transform(dx['Change_Open_Eng_to_Mkbl_value']) dx['Change_Pav_Eng_to_Mkbl_value'] = loaded_label_encoder['Change_Pav_Eng_to_Mkbl_value'].inverse_transform(dx['Change_Pav_Eng_to_Mkbl_value']) dx['Change_Blk_Eng_to_Grd_value'] = loaded_label_encoder['Change_Blk_Eng_to_Grd_value'].inverse_transform(dx['Change_Blk_Eng_to_Grd_value']) dx['Change_Wht_Eng_to_Grd_value'] = loaded_label_encoder['Change_Wht_Eng_to_Grd_value'].inverse_transform(dx['Change_Wht_Eng_to_Grd_value']) dx['Change_Open_Eng_to_Grd_value'] = loaded_label_encoder['Change_Open_Eng_to_Grd_value'].inverse_transform(dx['Change_Open_Eng_to_Grd_value']) dx['Change_Pav_Eng_to_Grd_value'] = loaded_label_encoder['Change_Pav_Eng_to_Grd_value'].inverse_transform(dx['Change_Pav_Eng_to_Grd_value']) dx['Change_Blk_Eng_to_ByGrd_value'] = loaded_label_encoder['Change_Blk_Eng_to_ByGrd_value'].inverse_transform(dx['Change_Blk_Eng_to_ByGrd_value']) dx['Change_Wht_Eng_to_ByGrd_value'] = loaded_label_encoder['Change_Wht_Eng_to_ByGrd_value'].inverse_transform(dx['Change_Wht_Eng_to_ByGrd_value']) dx['Change_Open_Eng_to_ByGrd_value'] = loaded_label_encoder['Change_Open_Eng_to_ByGrd_value'].inverse_transform(dx['Change_Open_Eng_to_ByGrd_value']) dx['Change_Pav_Eng_to_ByGrd_value'] = loaded_label_encoder['Change_Pav_Eng_to_ByGrd_value'].inverse_transform(dx['Change_Pav_Eng_to_ByGrd_value']) dx['Change_Blk_Eng_to_Gia_value'] = loaded_label_encoder['Change_Blk_Eng_to_Gia_value'].inverse_transform(dx['Change_Blk_Eng_to_Gia_value']) dx['Change_Wht_Eng_to_Gia_value'] = loaded_label_encoder['Change_Wht_Eng_to_Gia_value'].inverse_transform(dx['Change_Wht_Eng_to_Gia_value']) dx['Change_Open_Eng_to_Gia_value'] = loaded_label_encoder['Change_Open_Eng_to_Gia_value'].inverse_transform(dx['Change_Open_Eng_to_Gia_value']) dx['Change_Pav_Eng_to_Gia_value'] = loaded_label_encoder['Change_Pav_Eng_to_Gia_value'].inverse_transform(dx['Change_Pav_Eng_to_Gia_value']) # Final return with full data for pagination. return df_pred, dx.head(len(df_pred)) except Exception as e: print(f'Error processing file: {e}', 'error') return pd.DataFrame(), pd.DataFrame() # ------------------------------ # Report View Route with Pagination & Toggle # ------------------------------ @app.route('/report') def report_view(): report_type = request.args.get('report_type', 'pred') try: page = int(request.args.get('page', 1)) except ValueError: page = 1 per_page = 15 # records per page # Read the appropriate CSV file. if report_type == 'pred': df = pd.read_csv(PRED_OUTPUT_FILE) else: df = pd.read_csv(CLASS_OUTPUT_FILE) start_idx = (page - 1) * per_page end_idx = start_idx + per_page total_records = len(df) df_page = df.iloc[start_idx:end_idx] table_html = df_page.to_html(classes="data-table", index=False) has_prev = page > 1 has_next = end_idx < total_records return render_template('output.html', table_html=table_html, report_type=report_type, page=page, has_prev=has_prev, has_next=has_next) # ------------------------------ # Download Routes # ------------------------------ @app.route('/download_pred', methods=['GET']) def download_pred(): return send_file(PRED_OUTPUT_FILE, as_attachment=True) @app.route('/download_class', methods=['GET']) def download_class(): return send_file(CLASS_OUTPUT_FILE, as_attachment=True) if __name__ == "__main__": app.run(debug=True)