Spaces:

mgokg
/

gemini-2.0-flash-exp

Running

App Files Files Community

gemini-2.0-flash-exp / app.py

mgokg

Update app.py

1605c68 verified 7 months ago

raw

history blame

7.43 kB

	import gradio as gr
	import requests
	from bs4 import BeautifulSoup
	from urllib.parse import urljoin
	import os
	import json
	import pandas as pd
	from io import StringIO
	import google.generativeai as genai

	# Load environment variables
	genai.configure(api_key=os.environ["geminiapikey"])
	api_key = os.environ.get('GROQ_API_KEY')
	read_key = os.environ.get('HF_TOKEN', None)

	# Initialize Groq client
	if api_key:
	from groq import Client as GroqClient
	client = GroqClient(api_key=api_key)
	else:
	client = None

	# Use Llama 3 70B powered by Groq for answering
	def ask_llm(ort):
	if not client:
	return "Groq API key not set."

	try:
	completion = client.chat.completions.create(
	model="llama3-70b-8192",
	messages=[
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": f"{ort}. \n instruction: antworte kurz und knapp. antworte immer auf deutsch"}
	],
	)
	return completion.choices[0].message.content
	except Exception as e:
	return f"Error in response generation: {str(e)}"

	def parse_links_and_content(ort):
	base_url = "https://vereine-in-deutschland.net"
	all_links = []
	all_links_text = []
	initial_url = f"{base_url}/vereine/Bayern/{ort}"

	try:
	response = requests.get(initial_url)
	response.raise_for_status()

	soup = BeautifulSoup(response.content, 'html.parser')

	# Determine the last page
	link_element = soup.select_one('li.page-item:nth-child(8) > a:nth-child(1)')
	last_page = 10
	if link_element and 'href' in link_element.attrs:
	href = link_element['href']
	last_page = int(href.split('/')[-1])

	# Loop through all pages and collect links
	for page_number in range(1, last_page + 1):
	page_url = f"{base_url}/vereine/Bayern/{ort}/p/{page_number}"
	response = requests.get(page_url)
	response.raise_for_status()
	soup = BeautifulSoup(response.content, 'html.parser')
	target_div = soup.select_one('div.row-cols-1:nth-child(4)')

	if target_div:
	links = [urljoin(base_url, a['href']) for a in target_div.find_all('a', href=True)]
	texts = [a.text for a in target_div.find_all('a', href=True)]
	all_links.extend(links)
	all_links_text.extend(texts)
	else:
	print(f"Target div not found on page {page_number}")

	except Exception as e:
	return str(e), []

	all_links = all_links[0::2]
	all_links_text = all_links_text[0::2]

	return all_links_text, all_links

	def extract_vereinsname(url):
	parts = url.split('/')
	vereinsname = parts[-1]
	vereinsname = vereinsname.replace("-", " ")
	return vereinsname

	def scrape_links(links):
	details = []
	for link in links:
	try:
	response = requests.get(link)
	response.raise_for_status()
	soup = BeautifulSoup(response.content, 'html.parser')
	target_nav = soup.select_one('.nav')
	parts = link.split('/')

	# Log the URL and its parts for debugging
	print(f"Processing URL: {link}")
	print(f"URL parts: {parts}")

	# Extract the name of the Verein from the URL
	vereinsname = parts[-1] if parts[-1] else parts[-2] # Fallback to the second-to-last part if the last part is empty
	texte = target_nav.text.strip()
	texte = texte.replace("Amtsgericht: Schweinfurt", "")
	texte = texte.replace("Adresse folgt", "")
	texte = texte.replace("Adresse", "Adresse:")
	texte = texte.replace("Kontakt", "Email:")
	texte = texte.replace("Noch keine Daten vorhanden", "")

	if target_nav:
	details.append(f"Verein: {vereinsname} {texte}")
	else:
	details.append(f"Verein: {vereinsname} - No contact information found")
	except Exception as e:
	details.append(f"Error: {str(e)}")

	return details

	def save_to_csv(data, filename):
	keys = data[0].keys() if data else []
	with open(filename, 'w', newline='', encoding='utf-8') as output_file:
	dict_writer = csv.DictWriter(output_file, fieldnames=keys)
	dict_writer.writeheader()
	dict_writer.writerows(data)

	# Clear output
	def clear():
	return "", ""

	def load_data():
	return df

	# Create the Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("[Download](https://specialist-it.de/verein.csv)")
	with gr.Row():
	ort_input = gr.Textbox(label="Ort", placeholder="Gib den Namen des Ortes ein")
	with gr.Row():
	#details_output = gr.DataFrame(label="Ausgabe")
	details_output = gr.Textbox(label="Ausgabe", value = f"\n\n\n\n")

	def process_ort(ort):
	links_text, links = parse_links_and_content(ort)
	#return links_text
	kontakt = []

	# Create the model
	generation_config = {
	"temperature": 1,
	"top_p": 0.95,
	"top_k": 40,
	"max_output_tokens": 8192,
	"response_mime_type": "text/plain",
	}

	model = genai.GenerativeModel(
	model_name="gemini-2.0-flash-exp",
	generation_config=generation_config,
	)

	chat_session = model.start_chat(
	history=[
	]
	)


	for verein in links_text:
	response = chat_session.send_message(f"kontakdaten für {verein}")

	kontakt.append(response.text)


	return kontakt

	contact_details = scrape_links(links)

	from gradio_client import Client

	qwen_client = Client("Qwen/Qwen2.5-72B-Instruct")
	result = qwen_client.predict(
	query=f"return a valid json objects with contact details foreach verein. return the generated json only \n {contact_details}",
	history=[],
	system="you are a expert for json data and your job is to extract information from text and return a valid json object only. no text no explanations. reverse all email adresses. example: reverse ed.nesuahsgnitle-vs@dnatsrov to [email protected]. replace all dashes from vereinsname with a whitespace",
	api_name="/model_chat"
	)

	json_data = result[1][0][1]
	json_data = json_data.replace("```json", "")
	json_data = json_data.replace("```", "")

	# Convert JSON string to Python dictionary
	data_dict = json.loads(json_data)

	# Convert dictionary to DataFrame
	df = pd.DataFrame(data_dict)
	# DataFrame in eine CSV-Datei konvertieren
	#df.to_csv('daten.csv', index=False)
	# DataFrame in eine CSV-Variable konvertieren
	csv_buffer = StringIO()
	df.to_csv(csv_buffer, index=False)
	csv_data = csv_buffer.getvalue()

	print(csv_data)
	#return csv_data
	return df

	with gr.Row():
	clearbutton = gr.Button("Clear")
	button = gr.Button("Senden")

	# Connect the button to the function
	button.click(fn=process_ort, inputs=ort_input, outputs=details_output)
	clearbutton.click(fn=clear, inputs=[], outputs=details_output)

	# Launch the Gradio application
	demo.launch()