Mattral commited on
Commit
c7a122e
·
verified ·
1 Parent(s): 3c67bbb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -5
app.py CHANGED
@@ -28,11 +28,15 @@ class StreamHandler(BaseCallbackHandler):
28
 
29
  @st.cache_data
30
  def get_page_urls(url):
31
- page = requests.get(url)
32
- soup = BeautifulSoup(page.content, 'html.parser')
33
- links = [link['href'] for link in soup.find_all('a') if link['href'].startswith(url) and link['href'] not in [url]]
34
- links.append(url)
35
- return set(links)
 
 
 
 
36
 
37
 
38
  def get_url_content(url):
 
28
 
29
  @st.cache_data
30
  def get_page_urls(url):
31
+ try:
32
+ page = requests.get(url)
33
+ soup = BeautifulSoup(page.content, 'html.parser')
34
+ links = [link['href'] for link in soup.find_all('a') if 'href' in link.attrs and link['href'].startswith(url) and link['href'] not in [url]]
35
+ links.append(url)
36
+ return set(links)
37
+ except requests.RequestException as e:
38
+ st.error(f"Failed to load page: {e}")
39
+ return set()
40
 
41
 
42
  def get_url_content(url):