Gopala Krishna commited on
Commit
6e7fba3
·
1 Parent(s): 8c3633d
.vs/UBCFProductRecommendations/FileContentIndex/3d67ad77-3441-41ca-8028-b6802390d8c7.vsidx DELETED
Binary file (9.21 kB)
 
.vs/UBCFProductRecommendations/v17/.wsuo CHANGED
Binary files a/.vs/UBCFProductRecommendations/v17/.wsuo and b/.vs/UBCFProductRecommendations/v17/.wsuo differ
 
app.py CHANGED
@@ -1,31 +1,11 @@
1
 
2
- # Import necessary libraries.
3
  import pandas as pd
4
  from sklearn.metrics.pairwise import cosine_similarity
5
 
6
  # Read data source Excel files.
7
  df1 = pd.read_excel('Online_Retail.xlsx')
8
-
9
- # Check dataframe information.
10
- #df1.info()
11
-
12
- # Read header of dataframe.
13
- #df1.head()
14
-
15
- # Check any column containing the null value.
16
- #df1.isnull().any()
17
-
18
- # Count the number of null value records in the CustomerID column.
19
- #df1['CustomerID'].isna().sum()
20
-
21
  df1a = df1.dropna(subset=['CustomerID'])
22
 
23
- # Check dataframe information.
24
- #df1a.info()
25
-
26
- # Read header of dataframe.
27
- #df1a.head()
28
-
29
  # Create CustomerID vs Item (Purchased Items, by StockCode) matrix by pivot table function.
30
  CustomerID_Item_matrix = df1a.pivot_table(
31
  index='CustomerID',
@@ -34,51 +14,30 @@ CustomerID_Item_matrix = df1a.pivot_table(
34
  aggfunc='sum'
35
  )
36
 
37
- # Display the shape of matrix, 4372 rows of CustomerID, 3684 columns of Item.
38
- #CustomerID_Item_matrix.shape
39
-
40
  # Update illustration of the matrix, 1 to represent customer have purchased item, 0 to represent customer haven't purchased.
41
  CustomerID_Item_matrix = CustomerID_Item_matrix.applymap(lambda x: 1 if x > 0 else 0)
42
 
43
- # Read header of CustomerID vs Item matrix.
44
- #CustomerID_Item_matrix.loc[12680:].head()
45
-
46
  # Create User to User similarity matrix.
47
  user_to_user_similarity_matrix = pd.DataFrame(
48
  cosine_similarity(CustomerID_Item_matrix)
49
  )
50
 
51
- # Display header of User to User similarity matrix.
52
- #user_to_user_similarity_matrix.head()
53
-
54
  # Update index to corresponding CustomerID.
55
  user_to_user_similarity_matrix.columns = CustomerID_Item_matrix.index
56
  user_to_user_similarity_matrix['CustomerID'] = CustomerID_Item_matrix.index
57
  user_to_user_similarity_matrix = user_to_user_similarity_matrix.set_index('CustomerID')
58
 
59
- # Display header of User to User similarity matrix.
60
- #user_to_user_similarity_matrix.head()
61
-
62
- # Randomly pick CustomerID (12702) to display the most similar CustomerID.
63
- # The most similar CustomerID is 14608, which has 51% similarity.
64
- #user_to_user_similarity_matrix.loc[12702.0].sort_values(ascending=False)
65
-
66
  # Display CustomerID (12702) purchased items.
67
  items_purchased_by_X = set(CustomerID_Item_matrix.loc[12702.0].iloc[
68
  CustomerID_Item_matrix.loc[12702.0].to_numpy().nonzero()].index)
69
- #items_purchased_by_X
70
 
71
  # Display CustomerID (14608) purchased items.
72
  items_purchased_by_Y = set(CustomerID_Item_matrix.loc[14608.0].iloc[
73
  CustomerID_Item_matrix.loc[14608.0].to_numpy().nonzero()].index)
74
- #items_purchased_by_Y
75
 
76
  # Find out items which purchased by X (12702) but not yet purchased by Y (14608).
77
  items_to_recommend_to_Y = items_purchased_by_X - items_purchased_by_Y
78
 
79
- # Display the list of items recommended for Y (14608).
80
- #items_to_recommend_to_Y
81
-
82
  # Display the list of items recommended for Y (14608) with item Description.
83
  print(df1a.loc[
84
  df1a['StockCode'].isin(items_to_recommend_to_Y),
 
1
 
 
2
  import pandas as pd
3
  from sklearn.metrics.pairwise import cosine_similarity
4
 
5
  # Read data source Excel files.
6
  df1 = pd.read_excel('Online_Retail.xlsx')
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  df1a = df1.dropna(subset=['CustomerID'])
8
 
 
 
 
 
 
 
9
  # Create CustomerID vs Item (Purchased Items, by StockCode) matrix by pivot table function.
10
  CustomerID_Item_matrix = df1a.pivot_table(
11
  index='CustomerID',
 
14
  aggfunc='sum'
15
  )
16
 
 
 
 
17
  # Update illustration of the matrix, 1 to represent customer have purchased item, 0 to represent customer haven't purchased.
18
  CustomerID_Item_matrix = CustomerID_Item_matrix.applymap(lambda x: 1 if x > 0 else 0)
19
 
 
 
 
20
  # Create User to User similarity matrix.
21
  user_to_user_similarity_matrix = pd.DataFrame(
22
  cosine_similarity(CustomerID_Item_matrix)
23
  )
24
 
 
 
 
25
  # Update index to corresponding CustomerID.
26
  user_to_user_similarity_matrix.columns = CustomerID_Item_matrix.index
27
  user_to_user_similarity_matrix['CustomerID'] = CustomerID_Item_matrix.index
28
  user_to_user_similarity_matrix = user_to_user_similarity_matrix.set_index('CustomerID')
29
 
 
 
 
 
 
 
 
30
  # Display CustomerID (12702) purchased items.
31
  items_purchased_by_X = set(CustomerID_Item_matrix.loc[12702.0].iloc[
32
  CustomerID_Item_matrix.loc[12702.0].to_numpy().nonzero()].index)
 
33
 
34
  # Display CustomerID (14608) purchased items.
35
  items_purchased_by_Y = set(CustomerID_Item_matrix.loc[14608.0].iloc[
36
  CustomerID_Item_matrix.loc[14608.0].to_numpy().nonzero()].index)
 
37
 
38
  # Find out items which purchased by X (12702) but not yet purchased by Y (14608).
39
  items_to_recommend_to_Y = items_purchased_by_X - items_purchased_by_Y
40
 
 
 
 
41
  # Display the list of items recommended for Y (14608) with item Description.
42
  print(df1a.loc[
43
  df1a['StockCode'].isin(items_to_recommend_to_Y),