Spaces:

mgokg
/

gemini-2.0-flash-exp

Running

App Files Files Community

mgokg commited on Dec 2, 2024

Commit

5535edf

verified ·

1 Parent(s): 81460dc

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -16

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import gradio as gr
 import requests
 from bs4 import BeautifulSoup
-def parse_link(ort):
     # Konstruiere die vollständige URL
     url = f"https://vereine-in-deutschland.net/vereine/Bayern/{ort}"
@@ -14,34 +14,61 @@ def parse_link(ort):
         # Parse the HTML content using BeautifulSoup
         soup = BeautifulSoup(response.content, 'html.parser')
-        # Finde den Link mit dem CSS-Selektor
-        link_element = soup.select_one('li.page-item:nth-child(8) > a:nth-child(1)')
-        if link_element and 'href' in link_element.attrs:
-            href = link_element['href']
-            # Extrahiere die letzten beiden Zeichen der URL
-            last_two_chars = href[-2:]
-            # Konvertiere die letzten beiden Zeichen in einen Integer
-            last_two_chars_int = int(last_two_chars)
-            return last_two_chars_int
         else:
-            return "Link not found"
     except Exception as e:
-        return str(e)
 # Erstelle die Gradio-Schnittstelle
 with gr.Blocks() as demo:
     gr.Markdown("# Vereine in Bayern Parser")
-    ort_input = gr.Textbox(label="Ort", placeholder="Gib den Namen des Ortes ein")
-    output = gr.Number(label="Letzte beiden Zeichen der URL (als Integer)")
     # Button zum Starten der Parsung
-    button = gr.Button("Parse Link")
     # Verbinde den Button mit der Funktion
-    button.click(fn=parse_link, inputs=ort_input, outputs=output)
 # Starte die Gradio-Anwendung
 demo.launch()

 import requests
 from bs4 import BeautifulSoup
+def parse_links_and_content(ort):
     # Konstruiere die vollständige URL
     url = f"https://vereine-in-deutschland.net/vereine/Bayern/{ort}"
         # Parse the HTML content using BeautifulSoup
         soup = BeautifulSoup(response.content, 'html.parser')
+        # Finde das Element mit dem CSS-Selektor
+        target_div = soup.select_one('div.row-cols-1:nth-child(4)')
+        if target_div:
+            # Extrahiere alle Links aus dem Element
+            links = [a['href'] for a in target_div.find_all('a', href=True)]
+            # Extrahiere den HTML-Code des Elements
+            html_code = str(target_div)
+            return html_code, links
         else:
+            return "Target div not found", []
     except Exception as e:
+        return str(e), []
+def scrape_links(links):
+    results = []
+    for link in links:
+        try:
+            # Senden der Anfrage an die URL
+            response = requests.get(link)
+            response.raise_for_status()  # Überprüfen, ob die Anfrage erfolgreich war
+            # Parse the HTML content using BeautifulSoup
+            soup = BeautifulSoup(response.content, 'html.parser')
+            # Extrahiere den gewünschten Inhalt (hier als Beispiel der Titel der Seite)
+            content = soup.title.string if soup.title else "No title found"
+            results.append((link, content))
+        except Exception as e:
+            results.append((link, str(e)))
+    return results
 # Erstelle die Gradio-Schnittstelle
 with gr.Blocks() as demo:
     gr.Markdown("# Vereine in Bayern Parser")
+    ort_input = gr.Textbox(label="Ort", placeholder="Gib den Namen des Ortes ein")
+    html_output = gr.Code(label="HTML-Code des Elements", language="html")
+    links_output = gr.JSON(label="Gefundene Links")
+    content_output = gr.JSON(label="Inhalt der Links")
+    def process_ort(ort):
+        html_code, links = parse_links_and_content(ort)
+        scraped_content = scrape_links(links)
+        return html_code, links, scraped_content
     # Button zum Starten der Parsung
+    button = gr.Button("Parse und Scrape")
     # Verbinde den Button mit der Funktion
+    button.click(fn=process_ort, inputs=ort_input, outputs=[html_output, links_output, content_output])
 # Starte die Gradio-Anwendung
 demo.launch()