{ "cells": [ { "cell_type": "code", "execution_count": 3, "id": "7395ea86-9a4f-49bf-a256-ad6a2e5998a0", "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#import all the necessary packages.\n", "from PIL import Image\n", "import requests\n", "from io import BytesIO\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "import warnings\n", "from bs4 import BeautifulSoup\n", "from nltk.corpus import stopwords\n", "from nltk.tokenize import word_tokenize\n", "import nltk\n", "import math\n", "import time\n", "import re\n", "import os\n", "import seaborn as sns\n", "from collections import Counter\n", "from sklearn.feature_extraction.text import CountVectorizer\n", "from sklearn.feature_extraction.text import TfidfVectorizer\n", "from sklearn.metrics.pairwise import cosine_similarity \n", "from sklearn.metrics import pairwise_distances\n", "from matplotlib import gridspec\n", "from scipy.sparse import hstack\n", "import plotly\n", "import plotly.figure_factory as ff\n", "from plotly.graph_objs import Scatter, Layout\n", "\n", "plotly.offline.init_notebook_mode(connected=True)\n", "warnings.filterwarnings(\"ignore\")" ] }, { "cell_type": "code", "execution_count": 5, "id": "e259843e-297a-48c2-8345-aaab357ddb53", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | asin | \n", "brand | \n", "color | \n", "medium_image_url | \n", "product_type_name | \n", "title | \n", "formatted_price | \n", "
---|---|---|---|---|---|---|---|
4 | \n", "B004GSI2OS | \n", "FeatherLite | \n", "Onyx Black/ Stone | \n", "https://images-na.ssl-images-amazon.com/images... | \n", "SHIRT | \n", "featherlite ladies long sleeve stain resistant... | \n", "$26.26 | \n", "
6 | \n", "B012YX2ZPI | \n", "HX-Kingdom Fashion T-shirts | \n", "White | \n", "https://images-na.ssl-images-amazon.com/images... | \n", "SHIRT | \n", "womens unique 100 cotton special olympics wor... | \n", "$9.99 | \n", "
15 | \n", "B003BSRPB0 | \n", "FeatherLite | \n", "White | \n", "https://images-na.ssl-images-amazon.com/images... | \n", "SHIRT | \n", "featherlite ladies moisture free mesh sport sh... | \n", "$20.54 | \n", "
27 | \n", "B014ICEJ1Q | \n", "FNC7C | \n", "Purple | \n", "https://images-na.ssl-images-amazon.com/images... | \n", "SHIRT | \n", "supernatural chibis sam dean castiel neck tshi... | \n", "$7.39 | \n", "
46 | \n", "B01NACPBG2 | \n", "Fifth Degree | \n", "Black | \n", "https://images-na.ssl-images-amazon.com/images... | \n", "SHIRT | \n", "fifth degree womens gold foil graphic tees jun... | \n", "$6.95 | \n", "