resume-api / controller /scraper.py
Nattyboi's picture
updated codebase to make it faster
bbd997e
raw
history blame
798 Bytes
def scrapeCourse(url):
import requests
from bs4 import BeautifulSoup
webcontent=[]
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
selector = "#main-content-anchor > div.paid-course-landing-page__body > div > div.ud-text-sm.component-margin.styles--description--AfVWV > div > div > div > div:nth-child(1) > ul"
content = soup.select(selector)
if content:
for item in content[0].find_all('li'):
webcontent.append(item.get_text(strip=True))
return webcontent
else:
print("No content found for the selector.")
else:
print(f"Failed to retrieve the page. Status code: {response.status_code}")