Gopala Krishna
.
d53f799
raw
history blame
2.12 kB
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import gradio as gr
def find_similar_items(stock_code):
# Read data source Excel files.
df1 = pd.read_excel('Online_Retail.xlsx')
# Check dataframe information.
df1a = df1.dropna(subset=['CustomerID'])
# Create CustomerID vs Item (Purchased Items, by StockCode) matrix by pivot table function.
CustomerID_Item_matrix = df1a.pivot_table(
index='CustomerID',
columns='StockCode',
values='Quantity',
aggfunc='sum'
)
# Update illustration of the matrix, 1 to represent customer have purchased item, 0 to represent customer haven't purchased.
CustomerID_Item_matrix = CustomerID_Item_matrix.applymap(lambda x: 1 if x > 0 else 0)
# Create Item to Item similarity matrix.
item_item_similarity_matrix = pd.DataFrame(
cosine_similarity(CustomerID_Item_matrix.T)
)
# Update index to corresponding Item Code (StockCode).
item_item_similarity_matrix.columns = CustomerID_Item_matrix.T.index
item_item_similarity_matrix['StockCode'] = CustomerID_Item_matrix.T.index
item_item_similarity_matrix = item_item_similarity_matrix.set_index('StockCode')
# Randomly pick StockCode (22632) to display the most similar StockCode.
top_10_similar_items = list(
item_item_similarity_matrix\
.loc[22632]\
.sort_values(ascending=False)\
.iloc[:5]\
.index
)
# Display the list of similar items of StockCode (23166) with item Description.
results_df = df1a.loc[df1a['StockCode'].isin(top_10_similar_items), ['StockCode', 'Description']].drop_duplicates().set_index('StockCode').loc[top_10_similar_items]
return "\n" + results_df.to_string()
# Set up the interface
stock_code_input = gr.inputs.Textbox(label="Enter Stock Code:")
output_table = gr.outputs.Textbox(label="Similar Items")
gr.Interface(fn=find_similar_items, inputs=stock_code_input, outputs=output_table, title="Similar Items Recommendation System", description="Enter a stock code to find similar items").launch()