Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -368,30 +368,6 @@ def fetch_articles_from_page(url):
|
|
368 |
articles = soup.find_all('div', class_='entry-post gt-box-shadow-2')
|
369 |
return articles, soup
|
370 |
|
371 |
-
def extract_articles(articles):
|
372 |
-
article_data = []
|
373 |
-
for article in articles:
|
374 |
-
title_div = article.find('h2', class_='entry-title')
|
375 |
-
title = title_div.get_text(strip=True) if title_div else "No Title"
|
376 |
-
date_div = article.find('div', class_='entry-date gt-meta')
|
377 |
-
date = date_div.get_text(strip=True) if date_div else "No Date"
|
378 |
-
link_tag = article.find('a')
|
379 |
-
link = link_tag['href'] if link_tag else "No Link"
|
380 |
-
if not link.startswith('http'):
|
381 |
-
link = "https://golomtbank.com" + link
|
382 |
-
article_response = requests.get(link)
|
383 |
-
article_response.raise_for_status()
|
384 |
-
article_soup = BeautifulSoup(article_response.content, 'html.parser')
|
385 |
-
article_content_div = article_soup.find('div', class_='entry-content')
|
386 |
-
article_content = article_content_div.get_text(strip=True) if article_content_div else "No content found"
|
387 |
-
article_data.append({
|
388 |
-
'title': title,
|
389 |
-
'date': date,
|
390 |
-
'link': link,
|
391 |
-
'content': article_content
|
392 |
-
})
|
393 |
-
return article_data
|
394 |
-
|
395 |
def fetch_articles_from_page(url):
|
396 |
response = requests.get(url)
|
397 |
response.raise_for_status()
|
|
|
368 |
articles = soup.find_all('div', class_='entry-post gt-box-shadow-2')
|
369 |
return articles, soup
|
370 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
371 |
def fetch_articles_from_page(url):
|
372 |
response = requests.get(url)
|
373 |
response.raise_for_status()
|