Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -23,15 +23,15 @@ def parse_links_and_content(ort):
|
|
| 23 |
|
| 24 |
if page_number < last_two_chars_int:
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
|
| 36 |
# Gehe zur nächsten Seite
|
| 37 |
page_number += 1
|
|
@@ -81,4 +81,4 @@ with gr.Blocks() as demo:
|
|
| 81 |
button.click(fn=process_ort, inputs=ort_input, outputs=[links_output, content_output])
|
| 82 |
|
| 83 |
# Starte die Gradio-Anwendung
|
| 84 |
-
demo.launch()
|
|
|
|
| 23 |
|
| 24 |
if page_number < last_two_chars_int:
|
| 25 |
|
| 26 |
+
# Finde das Element mit dem CSS-Selektor
|
| 27 |
+
target_div = soup.select_one('div.row-cols-1:nth-child(4)')
|
| 28 |
|
| 29 |
+
if target_div:
|
| 30 |
+
# Extrahiere alle Links aus dem Element und füge die Base URL hinzu
|
| 31 |
+
links = [urljoin(base_url, a['href']) for a in target_div.find_all('a', href=True)]
|
| 32 |
+
all_links.extend(links)
|
| 33 |
+
else:
|
| 34 |
+
print(f"Target div not found on page {page_number}")
|
| 35 |
|
| 36 |
# Gehe zur nächsten Seite
|
| 37 |
page_number += 1
|
|
|
|
| 81 |
button.click(fn=process_ort, inputs=ort_input, outputs=[links_output, content_output])
|
| 82 |
|
| 83 |
# Starte die Gradio-Anwendung
|
| 84 |
+
demo.launch()
|