Spaces:
Sleeping
Sleeping
from bs4 import BeautifulSoup | |
import pandas as pd | |
# Open and read the HTML file | |
with open("giffords.html", 'r', encoding='utf-8') as file: | |
html_content = file.read() | |
# Parse the HTML content | |
soup = BeautifulSoup(html_content, 'html.parser') | |
# Find all div elements | |
h3 = soup.find_all('h3', class_='gif-card__title') | |
# Initialize a list to store the data | |
data = [] | |
for link in h3: | |
name = link.find('a').text.strip() | |
data.append(name) | |
len(data) | |
# Make dataframe | |
df = pd.DataFrame(data) | |
df["Endorsed by"] = "Education Votes" | |
# Clean candidate name | |
df["Candidate"] = df["Candidate"].str.replace('\xa0(D)', '') | |
df["Candidate"] = df["Candidate"].str.replace('Rep. ', '') | |
# write to csv | |
df.to_csv("edu_endorsements.csv", index = False) |