Update app.py
Browse files
app.py
CHANGED
@@ -27,14 +27,16 @@ class StreamHandler(BaseCallbackHandler):
|
|
27 |
|
28 |
|
29 |
@st.cache_data
|
|
|
30 |
def get_page_urls(url):
|
31 |
page = requests.get(url)
|
32 |
soup = BeautifulSoup(page.content, 'html.parser')
|
33 |
-
links = [link['href'] for link in soup.find_all('a') if link['href'].startswith(url) and link['href'] not in [url]]
|
34 |
links.append(url)
|
35 |
return set(links)
|
36 |
|
37 |
|
|
|
38 |
def get_url_content(url):
|
39 |
response = requests.get(url)
|
40 |
if url.endswith('.pdf'):
|
|
|
27 |
|
28 |
|
29 |
@st.cache_data
|
30 |
+
|
31 |
def get_page_urls(url):
|
32 |
page = requests.get(url)
|
33 |
soup = BeautifulSoup(page.content, 'html.parser')
|
34 |
+
links = [link['href'] for link in soup.find_all('a') if 'href' in link.attrs and link['href'].startswith(url) and link['href'] not in [url]]
|
35 |
links.append(url)
|
36 |
return set(links)
|
37 |
|
38 |
|
39 |
+
|
40 |
def get_url_content(url):
|
41 |
response = requests.get(url)
|
42 |
if url.endswith('.pdf'):
|