openfree commited on
Commit
bc3552e
Β·
verified Β·
1 Parent(s): 69fda0a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -12
app.py CHANGED
@@ -471,19 +471,35 @@ def format_results_from_raw(response_data):
471
  if not news_results:
472
  return "검색 κ²°κ³Όκ°€ μ—†μŠ΅λ‹ˆλ‹€.", []
473
 
474
- articles = []
 
 
 
 
 
 
475
  for idx, result in enumerate(news_results, 1):
476
- articles.append({
477
- "index": idx,
478
- "title": result.get("title", "제λͺ© μ—†μŒ"),
479
- "link": result.get("url", result.get("link", "#")),
480
- "snippet": result.get("snippet", "λ‚΄μš© μ—†μŒ"),
481
- "channel": result.get("channel", result.get("source", "μ•Œ 수 μ—†μŒ")),
482
- "time": result.get("time", result.get("date", "μ•Œ 수 μ—†λŠ” μ‹œκ°„")),
483
- "image_url": result.get("img", result.get("thumbnail", "")),
484
- "translated_query": translated_query
485
- })
486
- return "", articles
 
 
 
 
 
 
 
 
 
 
487
  except Exception as e:
488
  return f"κ²°κ³Ό 처리 쀑 였λ₯˜ λ°œμƒ: {str(e)}", []
489
 
 
471
  if not news_results:
472
  return "검색 κ²°κ³Όκ°€ μ—†μŠ΅λ‹ˆλ‹€.", []
473
 
474
+ # ν•œκ΅­ 도메인 및 ν•œκ΅­ κ΄€λ ¨ ν‚€μ›Œλ“œ 필터링
475
+ korean_domains = ['.kr', 'korea', 'korean', 'yonhap', 'hankyung', 'chosun',
476
+ 'donga', 'joins', 'hani', 'koreatimes', 'koreaherald']
477
+ korean_keywords = ['korea', 'korean', 'seoul', 'busan', 'incheon', 'daegu',
478
+ 'gwangju', 'daejeon', 'ulsan', 'sejong']
479
+
480
+ filtered_articles = []
481
  for idx, result in enumerate(news_results, 1):
482
+ url = result.get("url", result.get("link", "")).lower()
483
+ title = result.get("title", "").lower()
484
+ channel = result.get("channel", result.get("source", "")).lower()
485
+
486
+ # ν•œκ΅­ κ΄€λ ¨ 컨텐츠 필터링
487
+ is_korean_content = any(domain in url or domain in channel for domain in korean_domains) or \
488
+ any(keyword in title.lower() for keyword in korean_keywords)
489
+
490
+ if not is_korean_content:
491
+ filtered_articles.append({
492
+ "index": idx,
493
+ "title": result.get("title", "제λͺ© μ—†μŒ"),
494
+ "link": url,
495
+ "snippet": result.get("snippet", "λ‚΄μš© μ—†μŒ"),
496
+ "channel": result.get("channel", result.get("source", "μ•Œ 수 μ—†μŒ")),
497
+ "time": result.get("time", result.get("date", "μ•Œ 수 μ—†λŠ” μ‹œκ°„")),
498
+ "image_url": result.get("img", result.get("thumbnail", "")),
499
+ "translated_query": translated_query
500
+ })
501
+
502
+ return "", filtered_articles
503
  except Exception as e:
504
  return f"κ²°κ³Ό 처리 쀑 였λ₯˜ λ°œμƒ: {str(e)}", []
505