from bs4 import BeautifulSoup import pandas as pd # Open and read the HTML file with open("giffords.html", 'r', encoding='utf-8') as file: html_content = file.read() # Parse the HTML content soup = BeautifulSoup(html_content, 'html.parser') # Find all div elements h3 = soup.find_all('h3', class_='gif-card__title') # Initialize a list to store the data data = [] for link in h3: name = link.find('a').text.strip() data.append(name) len(data) # Make dataframe df = pd.DataFrame(data) df["Endorsed by"] = "Education Votes" # Clean candidate name df["Candidate"] = df["Candidate"].str.replace('\xa0(D)', '') df["Candidate"] = df["Candidate"].str.replace('Rep. ', '') # write to csv df.to_csv("edu_endorsements.csv", index = False)