import gradio as gr import requests import json import os from datetime import datetime, timedelta from huggingface_hub import InferenceClient API_KEY = os.getenv("SERPHOUSE_API_KEY") hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN")) COUNTRY_CODES = { "United States": "US", "United Kingdom": "GB", "Canada": "CA", "Australia": "AU", "Germany": "DE", "France": "FR", "Japan": "JP", "South Korea": "KR", "China": "CN", "Taiwan": "TW", # 대만 추가 "India": "IN", "Brazil": "BR", "Mexico": "MX", "Russia": "RU", "Italy": "IT", "Spain": "ES", "Netherlands": "NL", "Singapore": "SG", "Hong Kong": "HK", "Indonesia": "ID", "Malaysia": "MY", "Philippines": "PH", "Thailand": "TH", "Vietnam": "VN", "Belgium": "BE", "Denmark": "DK", "Finland": "FI", "Ireland": "IE", "Norway": "NO", "Poland": "PL", "Sweden": "SE", "Switzerland": "CH", "Austria": "AT", "Czech Republic": "CZ", "Greece": "GR", "Hungary": "HU", "Portugal": "PT", "Romania": "RO", "Turkey": "TR", "Israel": "IL", "Saudi Arabia": "SA", "United Arab Emirates": "AE", "South Africa": "ZA", "Argentina": "AR", "Chile": "CL", "Colombia": "CO", "Peru": "PE", "Venezuela": "VE", "New Zealand": "NZ", "Bangladesh": "BD", "Pakistan": "PK", "Egypt": "EG", "Morocco": "MA", "Nigeria": "NG", "Kenya": "KE", "Ukraine": "UA", "Croatia": "HR", "Slovakia": "SK", "Bulgaria": "BG", "Serbia": "RS", "Estonia": "EE", "Latvia": "LV", "Lithuania": "LT", "Slovenia": "SI", "Luxembourg": "LU", "Malta": "MT", "Cyprus": "CY", "Iceland": "IS" } def is_english(text): # 영어로만 구성되어 있는지 확인 return all(ord(char) < 128 for char in text.replace(' ', '')) COUNTRY_LANGUAGES = { "South Korea": "ko", "Japan": "ja", "China": "zh", "Taiwan": "zh-tw", # 대만어(번체 중국어) 추가 "Russia": "ru", "France": "fr", "Germany": "de", "Spain": "es", "Italy": "it", "Netherlands": "nl", "Portugal": "pt", "Thailand": "th", "Vietnam": "vi", "Indonesia": "id", "Malaysia": "ms", "Saudi Arabia": "ar", "United Arab Emirates": "ar", "Egypt": "ar", "Morocco": "ar", "Greece": "el", "Poland": "pl", "Czech Republic": "cs", "Hungary": "hu", "Turkey": "tr", "Romania": "ro", "Bulgaria": "bg", "Croatia": "hr", "Serbia": "sr", "Slovakia": "sk", "Slovenia": "sl", "Estonia": "et", "Latvia": "lv", "Lithuania": "lt", "Ukraine": "uk", "Israel": "he", "Bangladesh": "bn", "Pakistan": "ur", "Finland": "fi", "Denmark": "da", "Norway": "no", "Sweden": "sv", "Iceland": "is", "Philippines": "fil", "Brazil": "pt-br", "Argentina": "es-ar", "Chile": "es-cl", "Colombia": "es-co", "Peru": "es-pe", "Venezuela": "es-ve" } # 번역 프롬프트 수정 def translate_query(query, country): try: if is_english(query): print(f"English query detected, using original: {query}") return query[:255] if country == "South Korea": return query[:255] if country in COUNTRY_LANGUAGES: query = query[:100] target_lang = COUNTRY_LANGUAGES[country] # 번역 프롬프트 개선 prompt = f"""Translate this text to {target_lang} language. For Japanese, use Kanji and Kana. For Chinese (China), use Simplified Chinese. For Chinese (Taiwan), use Traditional Chinese. For Korean, use Hangul. Only output the translated text without any explanation. Text to translate: {query}""" translated = hf_client.text_generation( prompt, max_new_tokens=50, temperature=0.1 ) translated = translated.strip()[:255] print(f"Original query: {query}") print(f"Translated query: {translated}") return translated return query[:255] except Exception as e: print(f"Translation error: {str(e)}") return query[:255] # MAJOR_COUNTRIES 정의 수정 MAJOR_COUNTRIES = list(COUNTRY_CODES.keys()) # COUNTRY_LOCATIONS 대신 COUNTRY_CODES 사용 # search_serphouse 함수 수정 def search_serphouse(query, country, page=1, num_result=10): url = "https://api.serphouse.com/serp/live" # 검색어 번역 translated_query = translate_query(query, country) print(f"Original query: {query}") print(f"Translated query: {translated_query}") payload = { "data": { "q": translated_query, "domain": "google.com", "country_code": COUNTRY_CODES.get(country, "US"), # country_code 사용 "lang": COUNTRY_LANGUAGES.get(country, "en"), "device": "desktop", "serp_type": "news", "page": "1", "num": "10" } } headers = { "accept": "application/json", "content-type": "application/json", "authorization": f"Bearer {API_KEY}" } try: response = requests.post(url, json=payload, headers=headers) print("Request payload:", json.dumps(payload, indent=2, ensure_ascii=False)) print("Response status:", response.status_code) response.raise_for_status() return {"results": response.json(), "translated_query": translated_query} except requests.RequestException as e: error_msg = f"Error: {str(e)}" if hasattr(response, 'text'): error_msg += f"\nResponse content: {response.text}" return {"error": error_msg, "translated_query": query} def format_results_from_raw(response_data): if "error" in response_data: return "Error: " + response_data["error"], [] try: results = response_data["results"] translated_query = response_data["translated_query"] news_results = results.get('results', {}).get('results', {}).get('news', []) if not news_results: return "검색 결과가 없습니다.", [] articles = [] for idx, result in enumerate(news_results, 1): articles.append({ "index": idx, "title": result.get("title", "제목 없음"), "link": result.get("url", result.get("link", "#")), "snippet": result.get("snippet", "내용 없음"), "channel": result.get("channel", result.get("source", "알 수 없음")), "time": result.get("time", result.get("date", "알 수 없는 시간")), "image_url": result.get("img", result.get("thumbnail", "")), "translated_query": translated_query }) return "", articles except Exception as e: return f"결과 처리 중 오류 발생: {str(e)}", [] def serphouse_search(query, country): response_data = search_serphouse(query, country) return format_results_from_raw(response_data) css = """ footer {visibility: hidden;} """ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI 서비스") as iface: gr.Markdown("검색어를 입력하고 원하는 국가를 선택하면, 해당 국가의 언어로 번역된 검색어로 뉴스를 검색합니다.") with gr.Column(): with gr.Row(): query = gr.Textbox(label="검색어") country = gr.Dropdown(MAJOR_COUNTRIES, label="국가", value="South Korea") # 번역된 검색어 표시 컴포넌트 translated_display = gr.Markdown(visible=True) search_button = gr.Button("검색", variant="primary") progress = gr.Progress() status_message = gr.Markdown(visible=False) articles_state = gr.State([]) article_components = [] for i in range(100): with gr.Group(visible=False) as article_group: title = gr.Markdown() image = gr.Image(width=200, height=150) snippet = gr.Markdown() info = gr.Markdown() article_components.append({ 'group': article_group, 'title': title, 'image': image, 'snippet': snippet, 'info': info, 'index': i, }) def search_and_display(query, country, articles_state, progress=gr.Progress()): progress(0, desc="검색 시작...") # 번역 및 표시 텍스트 처리 translated_query = translate_query(query, country) if is_english(query): translated_display_text = f"영어 검색어: {query}" elif country == "South Korea": translated_display_text = f"검색어: {query}" elif translated_query != query: translated_display_text = f"원본 검색어: {query}\n번역된 검색어: {translated_query}" else: translated_display_text = f"검색어: {query}" progress(0.2, desc="검색 중...") response_data = search_serphouse(query, country) error_message, articles = format_results_from_raw(response_data) outputs = [gr.update(value=translated_display_text, visible=True)] if error_message: outputs.append(gr.update(value=error_message, visible=True)) for comp in article_components: outputs.extend([ gr.update(visible=False), gr.update(), gr.update(), gr.update(), gr.update() ]) articles_state = [] else: outputs.append(gr.update(value="", visible=False)) total_articles = len(articles) for idx, comp in enumerate(article_components): progress((idx + 1) / total_articles, desc=f"결과 표시 중... {idx + 1}/{total_articles}") if idx < len(articles): article = articles[idx] image_url = article['image_url'] image_update = gr.update(value=image_url, visible=True) if image_url and not image_url.startswith('data:image') else gr.update(value=None, visible=False) outputs.extend([ gr.update(visible=True), gr.update(value=f"### [{article['title']}]({article['link']})"), image_update, gr.update(value=f"**요약:** {article['snippet']}"), gr.update(value=f"**출처:** {article['channel']} | **시간:** {article['time']}") ]) else: outputs.extend([ gr.update(visible=False), gr.update(), gr.update(), gr.update(), gr.update() ]) articles_state = articles progress(1.0, desc="완료!") outputs.append(articles_state) outputs.append(gr.update(visible=False)) return outputs # 검색 버튼 클릭 시 업데이트될 출력 컴포넌트 목록 search_outputs = [translated_display, gr.Markdown(visible=False)] # 번역 결과 표시 컴포넌트 for comp in article_components: search_outputs.extend([ comp['group'], comp['title'], comp['image'], comp['snippet'], comp['info'] ]) search_outputs.extend([articles_state, status_message]) # 검색 버튼 클릭 이벤트 설정 search_button.click( fn=search_and_display, inputs=[query, country, articles_state], outputs=search_outputs, show_progress=True ) iface.launch()