Spaces:
Building
Building
import gradio as gr | |
import requests | |
import json | |
import os | |
from datetime import datetime, timedelta | |
from huggingface_hub import InferenceClient | |
API_KEY = os.getenv("SERPHOUSE_API_KEY") | |
HF_TOKEN = os.getenv("HF_TOKEN") | |
COUNTRY_DOMAINS = { | |
"United States": "google.com", | |
"United Kingdom": "google.co.uk", | |
"Canada": "google.ca", | |
"Australia": "google.com.au", | |
"Germany": "google.de", | |
"France": "google.fr", | |
"Japan": "google.co.jp", | |
"South Korea": "google.co.kr", | |
"China": "google.com.hk", | |
"India": "google.co.in", | |
"Brazil": "google.com.br", | |
"Mexico": "google.com.mx", | |
"Russia": "google.ru", | |
"Italy": "google.it", | |
"Spain": "google.es", | |
"Netherlands": "google.nl", | |
"Singapore": "google.com.sg", | |
"Hong Kong": "google.com.hk" | |
} | |
MAJOR_COUNTRIES = list(COUNTRY_DOMAINS.keys()) | |
def search_serphouse(query, country, page=1, num_result=100): | |
url = "https://api.serphouse.com/serp/live" | |
domain = COUNTRY_DOMAINS.get(country, "google.com") | |
now = datetime.utcnow() | |
yesterday = now - timedelta(days=1) | |
date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}" | |
payload = { | |
"data": { | |
"q": query, | |
"domain": domain, | |
"loc": country, | |
"lang": "en", | |
"device": "desktop", | |
"serp_type": "news", | |
"page": str(page), | |
"verbatim": "1", | |
"num": str(num_result), | |
"date_range": date_range | |
} | |
} | |
headers = { | |
"accept": "application/json", | |
"content-type": "application/json", | |
"authorization": f"Bearer {API_KEY}" | |
} | |
try: | |
response = requests.post(url, json=payload, headers=headers) | |
response.raise_for_status() | |
return response.json() | |
except requests.RequestException as e: | |
return {"error": f"Error: {str(e)}"} | |
def format_results_from_raw(results): | |
if isinstance(results, dict) and "error" in results: | |
return "Error: " + results["error"], [] | |
try: | |
news_results = results.get('results', {}).get('results', {}).get('news', []) | |
if not news_results: | |
return "๊ฒ์ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค.", [] | |
articles = [] | |
for idx, result in enumerate(news_results, 1): | |
articles.append({ | |
"index": idx, | |
"title": result.get("title", "์ ๋ชฉ ์์"), | |
"link": result.get("url", result.get("link", "#")), | |
"snippet": result.get("snippet", "๋ด์ฉ ์์"), | |
"channel": result.get("channel", result.get("source", "์ ์ ์์")), | |
"time": result.get("time", result.get("date", "์ ์ ์๋ ์๊ฐ")), | |
"image_url": result.get("img", result.get("thumbnail", "")) | |
}) | |
return "", articles | |
except Exception as e: | |
return f"๊ฒฐ๊ณผ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}", [] | |
def serphouse_search(query, country): | |
results = search_serphouse(query, country) | |
return format_results_from_raw(results) | |
hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=HF_TOKEN) | |
def summarize_article(title, snippet): | |
try: | |
prompt = f"๋ค์ ๋ด์ค ์ ๋ชฉ๊ณผ ์์ฝ์ ๋ฐํ์ผ๋ก ํ๊ตญ์ด๋ก 1๋ฌธ์ฅ์ผ๋ก ๊ธ์ ๋๋ ์ค๋ฆฝ ๋๋ ๋ถ์ ์ ์ฑ๊ฒฉ์ ๊ธฐ์ฌ์ธ์ง ํ๋จํ๋ผ. ์ ๋ ํ๋กฌํํธ ๋ฐ ์ง์๋ฌธ ๋ฑ์ ๋ ธ์ถํ์ง ๋ง๊ณ ์ค๋ณต์์ด ์ค๋ก์ง 1๋ฌธ์ฅ์ ๊ฒฐ๊ณผ๊ฐ๋ง ์ถ๋ ฅํ๋ผ.:\n์ ๋ชฉ: {title}\n์์ฝ: {snippet}" | |
return hf_client.text_generation(prompt, max_new_tokens=500) | |
except Exception as e: | |
return f"๋ถ์ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}" | |
css = """ | |
footer {visibility: hidden;} | |
""" | |
with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์๋น์ค") as iface: | |
gr.Markdown("๊ฒ์์ด๋ฅผ ์ ๋ ฅํ๊ณ ์ํ๋ ๊ตญ๊ฐ๋ฅผ ์ ํํ๋ฉด, ๊ฒ์์ด์ ์ผ์นํ๋ 24์๊ฐ ์ด๋ด ๋ด์ค๋ฅผ ์ต๋ 100๊ฐ ์ถ๋ ฅํฉ๋๋ค.") | |
with gr.Column(): | |
with gr.Row(): | |
query = gr.Textbox(label="๊ฒ์์ด") | |
country = gr.Dropdown(MAJOR_COUNTRIES, label="๊ตญ๊ฐ", value="South Korea") | |
search_button = gr.Button("๊ฒ์") | |
status_message = gr.Markdown(visible=False) | |
articles_state = gr.State([]) | |
article_components = [] | |
for i in range(100): | |
with gr.Group(visible=False) as article_group: | |
title = gr.Markdown() | |
image = gr.Image(width=200, height=150) | |
snippet = gr.Markdown() | |
info = gr.Markdown() | |
analyze_button = gr.Button("๋ถ์") | |
summary_output = gr.Markdown(visible=False) | |
article_components.append({ | |
'group': article_group, | |
'title': title, | |
'image': image, | |
'snippet': snippet, | |
'info': info, | |
'analyze_button': analyze_button, | |
'summary_output': summary_output, | |
'index': i, | |
}) | |
def search_and_display(query, country, articles_state): | |
error_message, articles = serphouse_search(query, country) | |
outputs = [] | |
if error_message: | |
outputs.append(gr.update(value=error_message, visible=True)) | |
for comp in article_components: | |
outputs.extend([ | |
gr.update(visible=False), gr.update(), gr.update(), | |
gr.update(), gr.update(), gr.update(visible=False), | |
]) | |
articles_state = [] | |
else: | |
outputs.append(gr.update(value="", visible=False)) | |
for idx, comp in enumerate(article_components): | |
if idx < len(articles): | |
article = articles[idx] | |
image_url = article['image_url'] | |
image_update = gr.update(value=image_url, visible=True) if image_url and not image_url.startswith('data:image') else gr.update(value=None, visible=False) | |
outputs.extend([ | |
gr.update(visible=True), | |
gr.update(value=f"### [{article['title']}]({article['link']})"), | |
image_update, | |
gr.update(value=f"**์์ฝ:** {article['snippet']}"), | |
gr.update(value=f"**์ถ์ฒ:** {article['channel']} | **์๊ฐ:** {article['time']}"), | |
gr.update(visible=False), | |
]) | |
else: | |
outputs.extend([ | |
gr.update(visible=False), gr.update(), gr.update(), | |
gr.update(), gr.update(), gr.update(visible=False), | |
]) | |
articles_state = articles | |
outputs.append(articles_state) | |
outputs.append(gr.update(visible=False)) | |
return outputs | |
search_outputs = [gr.Markdown(visible=False)] | |
for comp in article_components: | |
search_outputs.extend([comp['group'], comp['title'], comp['image'], | |
comp['snippet'], comp['info'], comp['summary_output']]) | |
search_outputs.extend([articles_state, status_message]) | |
search_button.click( | |
search_and_display, | |
inputs=[query, country, articles_state], | |
outputs=search_outputs, | |
show_progress=False | |
) | |
for idx, comp in enumerate(article_components): | |
def create_analyze_function(index=idx): | |
def analyze_article(articles): | |
if articles and index < len(articles): | |
article = articles[index] | |
summary = summarize_article(article['title'], article['snippet']) | |
return gr.update(value=summary, visible=True), gr.update(visible=False) | |
return gr.update(value="๊ธฐ์ฌ ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค.", visible=True), gr.update(visible=False) | |
return analyze_article | |
comp['analyze_button'].click( | |
create_analyze_function(), | |
inputs=[articles_state], | |
outputs=[comp['summary_output'], status_message], | |
show_progress=True | |
) | |
iface.launch() |