File size: 4,636 Bytes
a936419
 
 
04f7cb6
085ef0b
ae59393
 
a936419
085ef0b
fa394de
0963c3d
085ef0b
 
 
 
 
 
 
5399f24
740258d
c51298d
085ef0b
 
56a9e2e
085ef0b
740258d
 
 
 
c51298d
740258d
085ef0b
740258d
 
 
 
5535edf
04f7cb6
740258d
 
 
085ef0b
e410dd0
2f3bf94
085ef0b
 
e410dd0
085ef0b
 
2f3bf94
1e4afd6
2f3bf94
 
085ef0b
 
 
 
2f3bf94
 
085ef0b
2f3bf94
 
085ef0b
2f3bf94
740258d
085ef0b
030bd9a
740258d
2f3bf94
 
085ef0b
e410dd0
 
085ef0b
920c8fd
740258d
085ef0b
96b654f
5535edf
085ef0b
3b28bd9
085ef0b
 
 
 
 
 
 
030bd9a
085ef0b
 
 
 
3b28bd9
504ec2e
 
3b28bd9
085ef0b
 
 
 
 
c1c8a1e
 
1e4afd6
 
 
 
 
 
085ef0b
a936419
ba5cdce
85deaff
 
5399f24
1e4afd6
 
5535edf
 
085ef0b
 
 
 
 
 
1e4afd6
 
 
 
 
085ef0b
 
 
1e4afd6
085ef0b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import gradio as gr
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import os
import json
import csv

# Load environment variables
api_key = os.environ.get('groq')
read_key = os.environ.get('HF_TOKEN', None)

# Initialize Groq client
if api_key:
    from groq import Client as GroqClient
    client = GroqClient(api_key=api_key)
else:
    client = None

# Use Llama 3 70B powered by Groq for answering
def ask_llm(ort):
    if not client:
        return "Groq API key not set."
    
    try:
        completion = client.chat.completions.create(
            model="llama3-70b-8192",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": f"{ort}. \n instruction: antworte kurz und knapp. antworte immer auf deutsch"}
            ],
        )
        return completion.choices[0].message.content
    except Exception as e:
        return f"Error in response generation: {str(e)}"

def parse_links_and_content(ort):
    base_url = "https://vereine-in-deutschland.net"
    all_links = []
    all_links_text = []
    initial_url = f"{base_url}/vereine/Bayern/{ort}"

    try:
        response = requests.get(initial_url)
        response.raise_for_status()

        soup = BeautifulSoup(response.content, 'html.parser')

        # Determine the last page
        link_element = soup.select_one('li.page-item:nth-child(8) > a:nth-child(1)')
        last_page = 10
        if link_element and 'href' in link_element.attrs:
            href = link_element['href']
            last_page = int(href.split('/')[-1])

        # Loop through all pages and collect links
        for page_number in range(1, last_page + 1):
            page_url = f"{base_url}/vereine/Bayern/{ort}/p/{page_number}"
            response = requests.get(page_url)
            response.raise_for_status()
            soup = BeautifulSoup(response.content, 'html.parser')
            target_div = soup.select_one('div.row-cols-1:nth-child(4)')

            if target_div:
                links = [urljoin(base_url, a['href']) for a in target_div.find_all('a', href=True)]
                texts = [a.text for a in target_div.find_all('a', href=True)]
                all_links.extend(links)
                all_links_text.extend(texts)
            else:
                print(f"Target div not found on page {page_number}")

    except Exception as e:
        return str(e), []

    all_links = all_links[0::2]
    all_links_text = all_links_text[0::2]

    return all_links_text, all_links

def scrape_links(links):
    details = []
    for link in links:
        try:
            response = requests.get(link)
            response.raise_for_status()
            soup = BeautifulSoup(response.content, 'html.parser')
            target_nav = soup.select_one('.nav')
            if target_nav:
                details.append(f"{link}: {target_nav.text.strip()}")
            else:
                details.append("No contact information found")
        except Exception as e:
            details.append(f"Error: {str(e)}")

    json_data = json.dumps(details, indent=4)
    return json_data

def save_to_csv(data, filename):
    keys = data[0].keys() if data else []
    with open(filename, 'w', newline='', encoding='utf-8') as output_file:
        dict_writer = csv.DictWriter(output_file, fieldnames=keys)
        dict_writer.writeheader()
        dict_writer.writerows(data)

#clear output
def clear():
    clear=""
    return clear
       

# Create the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("[![Download](https://specialist-it.de/downloadbut.png)](https://specialist-it.de/verein.csv)")
    with gr.Row():
        ort_input = gr.Textbox(label="Ort", placeholder="Gib den Namen des Ortes ein")
    with gr.Row():
        details_output = gr.JSON(label="Kontaktinformation")
        links_output = gr.JSON(label="Vereinsliste")

    def process_ort(ort):
        links_text, links = parse_links_and_content(ort)
        contact_details = scrape_links(links)
        json_data = [json.loads(detail) for detail in contact_details if detail.startswith("{")]
        save_to_csv(json_data, './contact_details.csv')
        return links_text, contact_details

    # Button to start the 

    with gr.Row():
        clearbutton = gr.Button("clear")  
        button = gr.Button("senden")    

    # Connect the button to the function
    button.click(fn=process_ort, inputs=ort_input, outputs=[links_output, details_output])
    clearbutton.click(fn=clear, inputs=[], outputs=links_output)
# Launch the Gradio application
demo.launch()