Mattral commited on
Commit
46c5199
·
verified ·
1 Parent(s): e6beffa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -1
app.py CHANGED
@@ -27,14 +27,16 @@ class StreamHandler(BaseCallbackHandler):
27
 
28
 
29
  @st.cache_data
 
30
  def get_page_urls(url):
31
  page = requests.get(url)
32
  soup = BeautifulSoup(page.content, 'html.parser')
33
- links = [link['href'] for link in soup.find_all('a') if link['href'].startswith(url) and link['href'] not in [url]]
34
  links.append(url)
35
  return set(links)
36
 
37
 
 
38
  def get_url_content(url):
39
  response = requests.get(url)
40
  if url.endswith('.pdf'):
 
27
 
28
 
29
  @st.cache_data
30
+
31
  def get_page_urls(url):
32
  page = requests.get(url)
33
  soup = BeautifulSoup(page.content, 'html.parser')
34
+ links = [link['href'] for link in soup.find_all('a') if 'href' in link.attrs and link['href'].startswith(url) and link['href'] not in [url]]
35
  links.append(url)
36
  return set(links)
37
 
38
 
39
+
40
  def get_url_content(url):
41
  response = requests.get(url)
42
  if url.endswith('.pdf'):