Spaces:

mgokg
/

gemini-2.0-flash-exp

Running

App Files Files Community

mgokg commited on Dec 2, 2024

Commit

e410dd0

verified ·

1 Parent(s): a225c2f

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -36

app.py CHANGED Viewed

@@ -5,41 +5,32 @@ from urllib.parse import urljoin
 def parse_links_and_content(ort):
     base_url = "https://vereine-in-deutschland.net"
-    all_links = []
-    # Start with the first page
-    page_number = 1
-    while True:
-        # Konstruiere die vollständige URL für die aktuelle Seite
-        url = f"{base_url}/vereine/Bayern/{ort}/p/{page_number}"
-        try:
-            # Senden der Anfrage an die URL
-            response = requests.get(url)
-            response.raise_for_status()  # Überprüfen, ob die Anfrage erfolgreich war
-            # Parse the HTML content using BeautifulSoup
-            soup = BeautifulSoup(response.content, 'html.parser')
-            if page_number < last_two_chars_int:
-                # Finde das Element mit dem CSS-Selektor
-                target_div = soup.select_one('div.row-cols-1:nth-child(4)')
-                if target_div:
-                    # Extrahiere alle Links aus dem Element und füge die Base URL hinzu
-                    links = [urljoin(base_url, a['href']) for a in target_div.find_all('a', href=True)]
-                    all_links.extend(links)
-                else:
-                    print(f"Target div not found on page {page_number}")
-            # Gehe zur nächsten Seite
-            page_number += 1
-        except Exception as e:
-            print(f"Error on page {page_number}: {str(e)}")
-            break
-    return all_links
 def scrape_links(links):
     results = []
@@ -66,19 +57,20 @@ with gr.Blocks() as demo:
     gr.Markdown("# Vereine in Bayern Parser")
     ort_input = gr.Textbox(label="Ort", placeholder="Gib den Namen des Ortes ein")
     links_output = gr.JSON(label="Gefundene Links")
     content_output = gr.JSON(label="Inhalt der Links")
     def process_ort(ort):
-        all_links = parse_links_and_content(ort)
-        scraped_content = scrape_links(all_links)
-        return all_links, scraped_content
     # Button zum Starten der Parsung
     button = gr.Button("Parse und Scrape")
     # Verbinde den Button mit der Funktion
-    button.click(fn=process_ort, inputs=ort_input, outputs=[links_output, content_output])
 # Starte die Gradio-Anwendung
-demo.launch()

 def parse_links_and_content(ort):
     base_url = "https://vereine-in-deutschland.net"
+    # Konstruiere die vollständige URL
+    url = f"{base_url}/vereine/Bayern/{ort}"
+    try:
+        # Senden der Anfrage an die URL
+        response = requests.get(url)
+        response.raise_for_status()  # Überprüfen, ob die Anfrage erfolgreich war
+        # Parse the HTML content using BeautifulSoup
+        soup = BeautifulSoup(response.content, 'html.parser')
+        # Finde das Element mit dem CSS-Selektor
+        target_div = soup.select_one('div.row-cols-1:nth-child(4)')
+        if target_div:
+            # Extrahiere alle Links aus dem Element und füge die Base URL hinzu
+            links = [urljoin(base_url, a['href']) for a in target_div.find_all('a', href=True)]
+            # Extrahiere den HTML-Code des Elements
+            html_code = str(target_div)
+            return html_code, links
+        else:
+            return "Target div not found", []
+    except Exception as e:
+        return str(e), []
 def scrape_links(links):
     results = []
     gr.Markdown("# Vereine in Bayern Parser")
     ort_input = gr.Textbox(label="Ort", placeholder="Gib den Namen des Ortes ein")
+    html_output = gr.Code(label="HTML-Code des Elements", language="html")
     links_output = gr.JSON(label="Gefundene Links")
     content_output = gr.JSON(label="Inhalt der Links")
     def process_ort(ort):
+        html_code, links = parse_links_and_content(ort)
+        scraped_content = scrape_links(links)
+        return html_code, links, scraped_content
     # Button zum Starten der Parsung
     button = gr.Button("Parse und Scrape")
     # Verbinde den Button mit der Funktion
+    button.click(fn=process_ort, inputs=ort_input, outputs=[html_output, links_output, content_output])
 # Starte die Gradio-Anwendung
+demo.launch()