|
|
|
import pandas as pd |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
|
|
|
df1 = pd.read_excel('Online_Retail.xlsx') |
|
df1a = df1.dropna(subset=['CustomerID']) |
|
|
|
|
|
CustomerID_Item_matrix = df1a.pivot_table( |
|
index='CustomerID', |
|
columns='StockCode', |
|
values='Quantity', |
|
aggfunc='sum' |
|
) |
|
|
|
|
|
CustomerID_Item_matrix = CustomerID_Item_matrix.applymap(lambda x: 1 if x > 0 else 0) |
|
|
|
|
|
user_to_user_similarity_matrix = pd.DataFrame( |
|
cosine_similarity(CustomerID_Item_matrix) |
|
) |
|
|
|
|
|
user_to_user_similarity_matrix.columns = CustomerID_Item_matrix.index |
|
user_to_user_similarity_matrix['CustomerID'] = CustomerID_Item_matrix.index |
|
user_to_user_similarity_matrix = user_to_user_similarity_matrix.set_index('CustomerID') |
|
|
|
|
|
items_purchased_by_X = set(CustomerID_Item_matrix.loc[12702.0].iloc[ |
|
CustomerID_Item_matrix.loc[12702.0].to_numpy().nonzero()].index) |
|
|
|
|
|
items_purchased_by_Y = set(CustomerID_Item_matrix.loc[14608.0].iloc[ |
|
CustomerID_Item_matrix.loc[14608.0].to_numpy().nonzero()].index) |
|
|
|
|
|
items_to_recommend_to_Y = items_purchased_by_X - items_purchased_by_Y |
|
|
|
|
|
print(df1a.loc[ |
|
df1a['StockCode'].isin(items_to_recommend_to_Y), |
|
['StockCode', 'Description'] |
|
].drop_duplicates().set_index('StockCode')) |
|
|
|
|
|
|
|
|