Spaces:
Running
Running
from joblib import dump, load | |
import pandas as pd | |
from sklearn import metrics | |
from flask import flash | |
import numpy as np | |
import pandas as pd | |
from sklearn.preprocessing import LabelEncoder | |
from sklearn.metrics.pairwise import cosine_similarity | |
from sklearn import metrics | |
def data_similarity(df,pt,index,column,value): | |
# index fetch | |
index = np.where(pt.index==index)[0][0] | |
similarity_scores = cosine_similarity(pt) | |
similar_items = sorted(list(enumerate(similarity_scores[index])),key=lambda x:x[1],reverse=True)[1:2] | |
data = [] | |
for i in similar_items: | |
item = [] | |
temp_df = df[df['index'] == pt.index[i[0]]] | |
item.extend(list(temp_df.drop_duplicates(index)[value].values)) | |
#item.extend(list(temp_df.drop_duplicates(index)[column].values)) | |
#item.extend(list(temp_df.drop_duplicates(index)[index].values)) | |
data.append(item) | |
list = [item.item() if isinstance(item, np.generic) else item for sublist in data for item in sublist] | |
original_values = [list['Change_cts_value'].inverse_transform([val]) for val in list] | |
return original_values | |
def recommendation_generator(df): | |
try: | |
pivot_cts = df.pivot_table(index='EngCts', columns='MkblCts', values='Change_cts_value') | |
pivot_shp = df.pivot_table(index='EngShp', columns='MkblShp', values='change_shape_value') | |
pivot_qua = df.pivot_table(index='EngQua', columns='MkblQua', values='Change_quality_value') | |
pivot_col = df.pivot_table(index='EngCol', columns='MkblCol', values='Change_color_value') | |
pivot_cut = df.pivot_table(index='EngCut', columns='MkblCut', values='Change_cut_value') | |
#============================================================================== | |
# # Recommendation | |
#============================================================================== | |
cts_data = data_similarity(df,pivot_cts,'EngCts','MkblCts','Change_cts_value') | |
shp_data = data_similarity(df,pivot_shp,'EngShp','MkblShp','Change_shape_value') | |
qua_data = data_similarity(df,pivot_qua,'EngQua','MkblQua','Change_quality_value') | |
col_data = data_similarity(df,pivot_col,'EngCol','MkblCol','Change_color_value') | |
cut_data = data_similarity(df,pivot_cut,'EngCut','MkblCut','Change_cut_value') | |
return cts_data,shp_data,qua_data,col_data,cut_data | |
except Exception as e: | |
flash(f'Error generating recommendation: {e}', 'error') | |
return None | |
def classification_report(df): | |
try: | |
classifcation_data = df[["EngGraphCts","EngCts","EngShp","EngQua","EngCol","EngCut","EngPol","EngSym","EngFlo","EngNts","EngMikly","EngLab","EngAmt", | |
"MkblCts","MkblShp","MkblQua","MkblCol","MkblCut","MkblPol","MkblSym","MkblFlo","MkblNts","MkblMikly","MkblLab","MkblAmt"]] | |
#============================================================================== | |
# # Feature Engineering to generate new columns | |
#============================================================================== | |
# Make predictions | |
classifcation_data["Cts_diff_eng_mkbl"] = round(classifcation_data["EngCts"] - classifcation_data["MkblCts"],2) | |
# Create a new column 'Change_Label' based on the values in 'Cts_diff_eng_mkbl' | |
classifcation_data['Change_cts_value'] = classifcation_data['Cts_diff_eng_mkbl'].apply( | |
lambda x: str(x)+' negative change' if x < 0 else (str(x)+' positive change' if x > 0 else 'no change') | |
) | |
# Create a new column 'Shape_Change' based on the values in 'EngShp' and 'MkblShp' | |
classifcation_data['Change_shape_value'] = classifcation_data.apply( | |
lambda row: str(row['EngShp'])+' to '+str(row['MkblShp'])+' shape change' if row['EngShp'] != row['MkblShp'] else 'shape not change', axis=1 | |
) | |
# Create a new column 'quality_Change' based on the values in 'EngQua' and 'MkblQua' | |
classifcation_data['Change_quality_value'] = classifcation_data.apply( | |
lambda row: str(row['EngQua'])+' to '+str(row['MkblQua'])+' quality change' if row['EngQua'] != row['MkblQua'] else 'quality not change', axis=1 | |
) | |
# Create a new column 'color_Change' based on the values in 'EngCol' and 'MkblCol' | |
classifcation_data['Change_color_value'] = classifcation_data.apply( | |
lambda row: str(row['EngCol'])+' to '+str(row['MkblCol'])+' color change' if row['EngCol'] != row['MkblCol'] else 'color not change', axis=1 | |
) | |
# Create a new column 'cut_Change' based on the values in 'EngCut' and 'MkblCut' | |
classifcation_data['Change_cut_value'] = classifcation_data.apply( | |
lambda row: str(row['EngCut'])+' to '+str(row['MkblCut'])+' cut change' if row['EngCut'] != row['MkblCut'] else 'cut not change', axis=1 | |
) | |
#============================================================================== | |
# # Label Encoding and storing the label encoders | |
#============================================================================== | |
# Get list of categorical variables | |
s = (classifcation_data.dtypes =="object") | |
object_cols = list(s[s].index) | |
print("Categorical variables:") | |
print(object_cols) | |
# Make copy to avoid changing original data | |
label_data = classifcation_data.copy() | |
# Apply label encoder to each column with categorical data | |
label_encoder = LabelEncoder() | |
for col in object_cols: | |
label_data[col] = label_encoder.fit_transform(label_data[col]) | |
dump(label_encoder, f"./AI_In_Diamond_Industry/Label_encoders/label_encoder_{col}.joblib") | |
label_data.head() | |
#============================================================================== | |
# # recommendation_system | |
#============================================================================== | |
df=classifcation_data.copy() | |
=recommendation_generator(df) | |
return label_data | |
except Exception as e: | |
flash(f'Error generating classification report: {e}', 'error') | |
return None | |