import pandas as pd from sklearn.metrics.pairwise import cosine_similarity # Read data source Excel files. df1 = pd.read_excel('Online_Retail.xlsx') df1a = df1.dropna(subset=['CustomerID']) # Create CustomerID vs Item (Purchased Items, by StockCode) matrix by pivot table function. CustomerID_Item_matrix = df1a.pivot_table( index='CustomerID', columns='StockCode', values='Quantity', aggfunc='sum' ) # Update illustration of the matrix, 1 to represent customer have purchased item, 0 to represent customer haven't purchased. CustomerID_Item_matrix = CustomerID_Item_matrix.applymap(lambda x: 1 if x > 0 else 0) # Create User to User similarity matrix. user_to_user_similarity_matrix = pd.DataFrame( cosine_similarity(CustomerID_Item_matrix) ) # Update index to corresponding CustomerID. user_to_user_similarity_matrix.columns = CustomerID_Item_matrix.index user_to_user_similarity_matrix['CustomerID'] = CustomerID_Item_matrix.index user_to_user_similarity_matrix = user_to_user_similarity_matrix.set_index('CustomerID') # Display CustomerID (12702) purchased items. items_purchased_by_X = set(CustomerID_Item_matrix.loc[12702.0].iloc[ CustomerID_Item_matrix.loc[12702.0].to_numpy().nonzero()].index) # Display CustomerID (14608) purchased items. items_purchased_by_Y = set(CustomerID_Item_matrix.loc[14608.0].iloc[ CustomerID_Item_matrix.loc[14608.0].to_numpy().nonzero()].index) # Find out items which purchased by X (12702) but not yet purchased by Y (14608). items_to_recommend_to_Y = items_purchased_by_X - items_purchased_by_Y # Display the list of items recommended for Y (14608) with item Description. print(df1a.loc[ df1a['StockCode'].isin(items_to_recommend_to_Y), ['StockCode', 'Description'] ].drop_duplicates().set_index('StockCode'))