Kims12 commited on
Commit
0fedf9f
Β·
verified Β·
1 Parent(s): 6faa7b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +623 -188
app.py CHANGED
@@ -1,21 +1,29 @@
1
  import gradio as gr
2
  import requests
3
  from bs4 import BeautifulSoup
4
- import urllib.parse # iframe 경둜 보정을 μœ„ν•œ λͺ¨λ“ˆ
5
  import re
6
  import logging
7
  import tempfile
8
  import pandas as pd
9
- import mecab # python‑mecab‑ko 라이브러리 μ‚¬μš©
10
  import os
11
  import time
12
  import hmac
13
  import hashlib
14
  import base64
 
 
 
 
 
 
 
 
15
 
16
  # 디버깅(둜그)용 ν•¨μˆ˜
17
  def debug_log(message: str):
18
- print(f"[DEBUG] {message}")
19
 
20
  # --- 넀이버 λΈ”λ‘œκ·Έ μŠ€ν¬λž˜ν•‘ ---
21
  def scrape_naver_blog(url: str) -> str:
@@ -29,7 +37,7 @@ def scrape_naver_blog(url: str) -> str:
29
  )
30
  }
31
  try:
32
- response = requests.get(url, headers=headers)
33
  debug_log("HTTP GET μš”μ²­(메인 νŽ˜μ΄μ§€) μ™„λ£Œ")
34
  if response.status_code != 200:
35
  debug_log(f"μš”μ²­ μ‹€νŒ¨, μƒνƒœμ½”λ“œ: {response.status_code}")
@@ -39,63 +47,127 @@ def scrape_naver_blog(url: str) -> str:
39
  iframe = soup.select_one("iframe#mainFrame")
40
  if not iframe:
41
  debug_log("iframe#mainFrame νƒœκ·Έλ₯Ό 찾을 수 μ—†μŠ΅λ‹ˆλ‹€.")
42
- return "λ³Έλ¬Έ iframe을 찾을 수 μ—†μŠ΅λ‹ˆλ‹€."
 
 
 
 
 
 
 
 
 
43
  iframe_src = iframe.get("src")
44
  if not iframe_src:
45
  debug_log("iframe srcκ°€ μ‘΄μž¬ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.")
46
  return "λ³Έλ¬Έ iframe의 srcλ₯Ό 찾을 수 μ—†μŠ΅λ‹ˆλ‹€."
47
- parsed_iframe_url = urllib.parse.urljoin(url, iframe_src)
 
 
 
 
 
 
 
 
 
 
 
48
  debug_log(f"iframe νŽ˜μ΄μ§€ μš”μ²­ URL: {parsed_iframe_url}")
49
- iframe_response = requests.get(parsed_iframe_url, headers=headers)
50
  debug_log("HTTP GET μš”μ²­(iframe νŽ˜μ΄μ§€) μ™„λ£Œ")
51
  if iframe_response.status_code != 200:
52
  debug_log(f"iframe μš”μ²­ μ‹€νŒ¨, μƒνƒœμ½”λ“œ: {iframe_response.status_code}")
53
  return f"iframeμ—μ„œ 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€. μƒνƒœμ½”λ“œ: {iframe_response.status_code}"
54
  iframe_soup = BeautifulSoup(iframe_response.text, "html.parser")
55
  debug_log("HTML νŒŒμ‹±(iframe νŽ˜μ΄μ§€) μ™„λ£Œ")
56
- title_div = iframe_soup.select_one('.se-module.se-module-text.se-title-text')
57
- title = title_div.get_text(strip=True) if title_div else "제λͺ©μ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€."
 
 
 
 
 
 
 
 
 
 
 
 
58
  debug_log(f"μΆ”μΆœλœ 제λͺ©: {title}")
59
- content_div = iframe_soup.select_one('.se-main-container')
60
- if content_div:
61
- content = content_div.get_text("\n", strip=True)
62
- else:
63
- content = "본문을 찾을 수 μ—†μŠ΅λ‹ˆλ‹€."
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  debug_log("λ³Έλ¬Έ μΆ”μΆœ μ™„λ£Œ")
65
  result = f"[제λͺ©]\n{title}\n\n[λ³Έλ¬Έ]\n{content}"
66
  debug_log("제λͺ©κ³Ό λ³Έλ¬Έ ν•©μΉ¨ μ™„λ£Œ")
67
  return result
 
 
 
68
  except Exception as e:
69
- debug_log(f"μ—λŸ¬ λ°œμƒ: {str(e)}")
70
  return f"μŠ€ν¬λž˜ν•‘ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
71
 
72
  # --- ν˜•νƒœμ†Œ 뢄석 (μ°Έμ‘°μ½”λ“œ-1) ---
73
  def analyze_text(text: str):
74
- logging.basicConfig(level=logging.DEBUG)
75
  logger = logging.getLogger(__name__)
76
- logger.debug("원본 ν…μŠ€νŠΈ: %s", text)
77
- filtered_text = re.sub(r'[^κ°€-힣]', '', text)
78
- logger.debug("ν•„ν„°λ§λœ ν…μŠ€νŠΈ: %s", filtered_text)
79
- if not filtered_text:
80
- logger.debug("μœ νš¨ν•œ ν•œκ΅­μ–΄ ν…μŠ€νŠΈκ°€ μ—†μŒ.")
 
 
 
 
 
 
81
  return pd.DataFrame(columns=["단어", "λΉˆλ„μˆ˜"]), ""
82
- mecab_instance = mecab.MeCab()
83
- tokens = mecab_instance.pos(filtered_text)
84
- logger.debug("ν˜•νƒœμ†Œ 뢄석 κ²°κ³Ό: %s", tokens)
85
  freq = {}
86
  for word, pos in tokens:
87
- if word and word.strip() and pos.startswith("NN"):
 
88
  freq[word] = freq.get(word, 0) + 1
89
- logger.debug("단어: %s, ν’ˆμ‚¬: %s, λΉˆλ„: %d", word, pos, freq[word])
 
90
  sorted_freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)
91
- logger.debug("μ •λ ¬λœ 단어 λΉˆλ„: %s", sorted_freq)
92
  df = pd.DataFrame(sorted_freq, columns=["단어", "λΉˆλ„μˆ˜"])
93
- logger.debug("ν˜•νƒœμ†Œ 뢄석 DataFrame 생성됨, shape: %s", df.shape)
94
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
95
- df.to_excel(temp_file.name, index=False, engine='openpyxl')
96
- temp_file.close()
97
- logger.debug("Excel 파일 생성됨: %s", temp_file.name)
98
- return df, temp_file.name
 
 
 
 
 
 
 
 
 
99
 
100
  # --- 넀이버 검색 및 κ΄‘κ³  API κ΄€λ ¨ (μ°Έμ‘°μ½”λ“œ-2) ---
101
  def generate_signature(timestamp, method, uri, secret_key):
@@ -114,189 +186,489 @@ def get_header(method, uri, api_key, secret_key, customer_id):
114
  "X-Signature": signature
115
  }
116
 
 
 
 
 
 
 
 
 
117
  def fetch_related_keywords(keyword):
118
- debug_log(f"fetch_related_keywords 호좜, ν‚€μ›Œλ“œ: {keyword}")
119
- API_KEY = os.environ["NAVER_API_KEY"]
120
- SECRET_KEY = os.environ["NAVER_SECRET_KEY"]
121
- CUSTOMER_ID = os.environ["NAVER_CUSTOMER_ID"]
 
 
 
 
 
122
  BASE_URL = "https://api.naver.com"
123
  uri = "/keywordstool"
124
  method = "GET"
125
- headers = get_header(method, uri, API_KEY, SECRET_KEY, CUSTOMER_ID)
126
- params = {
127
- "hintKeywords": [keyword],
128
- "showDetail": "1"
129
- }
130
- response = requests.get(BASE_URL + uri, params=params, headers=headers)
131
- data = response.json()
132
- if "keywordList" not in data:
133
- return pd.DataFrame()
134
- df = pd.DataFrame(data["keywordList"])
135
- if len(df) > 100:
136
- df = df.head(100)
137
- def parse_count(x):
138
- try:
139
- return int(str(x).replace(",", ""))
140
- except:
141
- return 0
142
- df["PCμ›”κ²€μƒ‰λŸ‰"] = df["monthlyPcQcCnt"].apply(parse_count)
143
- df["λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰"] = df["monthlyMobileQcCnt"].apply(parse_count)
144
- df["ν† νƒˆμ›”κ²€μƒ‰λŸ‰"] = df["PCμ›”κ²€μƒ‰λŸ‰"] + df["λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰"]
145
- df.rename(columns={"relKeyword": "μ •λ³΄ν‚€μ›Œλ“œ"}, inplace=True)
146
- result_df = df[["μ •λ³΄ν‚€μ›Œλ“œ", "PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰"]]
147
- debug_log("fetch_related_keywords μ™„λ£Œ")
148
- return result_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
  def fetch_blog_count(keyword):
151
  debug_log(f"fetch_blog_count 호좜, ν‚€μ›Œλ“œ: {keyword}")
152
- client_id = os.environ["NAVER_SEARCH_CLIENT_ID"]
153
- client_secret = os.environ["NAVER_SEARCH_CLIENT_SECRET"]
 
 
 
 
 
154
  url = "https://openapi.naver.com/v1/search/blog.json"
155
  headers = {
156
  "X-Naver-Client-Id": client_id,
157
  "X-Naver-Client-Secret": client_secret
158
  }
159
- params = {"query": keyword, "display": 1}
160
- response = requests.get(url, headers=headers, params=params)
161
- if response.status_code == 200:
 
 
162
  data = response.json()
163
- debug_log(f"fetch_blog_count κ²°κ³Ό: {data.get('total', 0)}")
164
- return data.get("total", 0)
165
- else:
166
- debug_log(f"fetch_blog_count 였λ₯˜, μƒνƒœμ½”λ“œ: {response.status_code}")
167
- return 0
 
 
 
 
 
168
 
169
  def create_excel_file(df):
170
- with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
171
- excel_path = tmp.name
172
- df.to_excel(excel_path, index=False)
173
- debug_log(f"Excel 파일 생성됨: {excel_path}")
174
- return excel_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
  def process_keyword(keywords: str, include_related: bool):
177
- debug_log(f"process_keyword 호좜, ν‚€μ›Œλ“œλ“€: {keywords}, 연관검색어 포함: {include_related}")
178
- input_keywords = [k.strip() for k in keywords.splitlines() if k.strip()]
179
- result_dfs = []
180
- for idx, kw in enumerate(input_keywords):
181
- df_kw = fetch_related_keywords(kw)
182
- if df_kw.empty:
183
- continue
184
- row_kw = df_kw[df_kw["μ •λ³΄ν‚€μ›Œλ“œ"] == kw]
185
- if not row_kw.empty:
186
- result_dfs.append(row_kw)
187
- else:
188
- result_dfs.append(df_kw.head(1))
189
- if include_related and idx == 0:
190
- df_related = df_kw[df_kw["μ •λ³΄ν‚€μ›Œλ“œ"] != kw]
191
- if not df_related.empty:
192
- result_dfs.append(df_related)
193
- if result_dfs:
194
- result_df = pd.concat(result_dfs, ignore_index=True)
195
- result_df.drop_duplicates(subset=["μ •λ³΄ν‚€μ›Œλ“œ"], inplace=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  else:
197
- result_df = pd.DataFrame(columns=["μ •λ³΄ν‚€μ›Œλ“œ", "PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰"])
198
- result_df["λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜"] = result_df["μ •λ³΄ν‚€μ›Œλ“œ"].apply(fetch_blog_count)
199
  result_df.sort_values(by="ν† νƒˆμ›”κ²€μƒ‰λŸ‰", ascending=False, inplace=True)
200
- debug_log("process_keyword μ™„λ£Œ")
 
 
 
 
 
 
 
 
 
201
  return result_df, create_excel_file(result_df)
202
 
 
203
  # --- ν˜•νƒœμ†Œ 뢄석과 κ²€μƒ‰λŸ‰/λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜ 병합 ---
204
  def morphological_analysis_and_enrich(text: str, remove_freq1: bool):
205
  debug_log("morphological_analysis_and_enrich ν•¨μˆ˜ μ‹œμž‘")
206
- df_freq, _ = analyze_text(text)
 
207
  if df_freq.empty:
208
  debug_log("ν˜•νƒœμ†Œ 뢄석 κ²°κ³Όκ°€ 빈 λ°μ΄ν„°ν”„λ ˆμž„μž…λ‹ˆλ‹€.")
209
- return df_freq, ""
 
210
  if remove_freq1:
211
- before_shape = df_freq.shape
212
- df_freq = df_freq[df_freq["λΉˆλ„μˆ˜"] != 1]
213
- debug_log(f"λΉˆλ„μˆ˜ 1 제거 적용됨. {before_shape} -> {df_freq.shape}")
214
- keywords = "\n".join(df_freq["단어"].tolist())
215
- debug_log(f"λΆ„μ„λœ ν‚€μ›Œλ“œ: {keywords}")
216
- df_keyword_info, _ = process_keyword(keywords, include_related=False)
217
- debug_log("κ²€μƒ‰λŸ‰ 및 λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜ 쑰회 μ™„λ£Œ")
218
- merged_df = pd.merge(df_freq, df_keyword_info, left_on="단어", right_on="μ •λ³΄ν‚€μ›Œλ“œ", how="left")
219
- merged_df.drop(columns=["μ •λ³΄ν‚€μ›Œλ“œ"], inplace=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  merged_excel_path = create_excel_file(merged_df)
221
  debug_log("morphological_analysis_and_enrich ν•¨μˆ˜ μ™„λ£Œ")
222
  return merged_df, merged_excel_path
223
 
 
224
  # --- 직접 ν‚€μ›Œλ“œ 뢄석 (단독 뢄석) ---
225
  def direct_keyword_analysis(text: str, keyword_input: str):
226
  debug_log("direct_keyword_analysis ν•¨μˆ˜ μ‹œμž‘")
227
- keywords = re.split(r'[\n,]+', keyword_input)
228
- keywords = [kw.strip() for kw in keywords if kw.strip()]
229
- debug_log(f"μž…λ ₯된 ν‚€μ›Œλ“œ λͺ©λ‘: {keywords}")
230
- results = []
231
- for kw in keywords:
232
- count = text.count(kw)
233
- results.append((kw, count))
234
- debug_log(f"ν‚€μ›Œλ“œ '{kw}'의 λΉˆλ„μˆ˜: {count}")
235
- df = pd.DataFrame(results, columns=["ν‚€μ›Œλ“œ", "λΉˆλ„μˆ˜"])
236
- excel_path = create_excel_file(df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  debug_log("direct_keyword_analysis ν•¨μˆ˜ μ™„λ£Œ")
238
- return df, excel_path
 
239
 
240
  # --- 톡합 뢄석 (ν˜•νƒœμ†Œ 뢄석 + 직접 ν‚€μ›Œλ“œ 뢄석) ---
241
  def combined_analysis(blog_text: str, remove_freq1: bool, direct_keyword_input: str):
242
  debug_log("combined_analysis ν•¨μˆ˜ μ‹œμž‘")
243
- merged_df, _ = morphological_analysis_and_enrich(blog_text, remove_freq1)
244
- if "μ§μ ‘μž…λ ₯" not in merged_df.columns:
245
- merged_df["μ§μ ‘μž…λ ₯"] = ""
246
- direct_keywords = re.split(r'[\n,]+', direct_keyword_input)
247
- direct_keywords = [kw.strip() for kw in direct_keywords if kw.strip()]
248
- debug_log(f"μž…λ ₯된 직접 ν‚€μ›Œλ“œ: {direct_keywords}")
249
- for dk in direct_keywords:
250
- if dk in merged_df["단어"].values:
251
- merged_df.loc[merged_df["단어"] == dk, "μ§μ ‘μž…λ ₯"] = "μ§μ ‘μž…λ ₯"
252
- else:
253
- freq = blog_text.count(dk)
254
- df_direct, _ = process_keyword(dk, include_related=False)
255
- if (not df_direct.empty) and (dk in df_direct["μ •λ³΄ν‚€μ›Œλ“œ"].values):
256
- row = df_direct[df_direct["μ •λ³΄ν‚€μ›Œλ“œ"] == dk].iloc[0]
257
- pc = row.get("PCμ›”κ²€μƒ‰λŸ‰", None)
258
- mobile = row.get("λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", None)
259
- total = row.get("ν† νƒˆμ›”κ²€μƒ‰λŸ‰", None)
260
- blog_count = row.get("λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜", None)
261
- else:
262
- pc = mobile = total = blog_count = None
263
- new_row = {
264
- "단어": dk,
265
- "λΉˆλ„μˆ˜": freq,
266
- "PCμ›”κ²€μƒ‰λŸ‰": pc,
267
- "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰": mobile,
268
- "ν† νƒˆμ›”κ²€μƒ‰λŸ‰": total,
269
- "λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜": blog_count,
270
- "μ§μ ‘μž…λ ₯": "μ§μ ‘μž…λ ₯"
271
- }
272
- merged_df = pd.concat([merged_df, pd.DataFrame([new_row])], ignore_index=True)
273
- merged_df = merged_df.sort_values(by="λΉˆλ„μˆ˜", ascending=False).reset_index(drop=True)
274
- combined_excel = create_excel_file(merged_df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
  debug_log("combined_analysis ν•¨μˆ˜ μ™„λ£Œ")
276
- return merged_df, combined_excel
 
277
 
278
  # --- 뢄석 ν•Έλ“€λŸ¬ ---
279
  def analysis_handler(blog_text: str, remove_freq1: bool, direct_keyword_input: str, direct_keyword_only: bool):
280
- debug_log("analysis_handler ν•¨μˆ˜ μ‹œμž‘")
 
 
 
 
 
 
 
 
 
 
 
281
  if direct_keyword_only:
282
  # "직접 ν‚€μ›Œλ“œ μž…λ ₯만 뢄석" 선택 μ‹œ 단독 뢄석 μˆ˜ν–‰
283
- return direct_keyword_analysis(blog_text, direct_keyword_input)
 
 
 
 
 
 
284
  else:
285
  # κΈ°λ³Έ 톡합 뢄석 μˆ˜ν–‰
286
- return combined_analysis(blog_text, remove_freq1, direct_keyword_input)
 
 
 
 
 
287
 
288
  # --- μŠ€ν¬λž˜ν•‘ μ‹€ν–‰ ---
289
  def fetch_blog_content(url: str):
290
  debug_log("fetch_blog_content ν•¨μˆ˜ μ‹œμž‘")
 
 
 
 
 
 
291
  content = scrape_naver_blog(url)
292
- debug_log("fetch_blog_content ν•¨μˆ˜ μ™„λ£Œ")
 
293
  return content
294
 
295
  # --- Custom CSS ---
296
  custom_css = """
297
  /* 전체 μ»¨ν…Œμ΄λ„ˆ μŠ€νƒ€μΌ */
298
  .gradio-container {
299
- max-width: 960px;
300
  margin: auto;
301
  font-family: 'Helvetica Neue', Arial, sans-serif;
302
  background: #f5f7fa;
@@ -330,7 +702,12 @@ custom_css = """
330
  padding: 0.6rem 1.2rem;
331
  font-size: 1rem;
332
  cursor: pointer;
 
333
  }
 
 
 
 
334
 
335
  /* μ²΄ν¬λ°•μŠ€ μŠ€νƒ€μΌ */
336
  .custom-checkbox {
@@ -351,39 +728,97 @@ custom_css = """
351
  """
352
 
353
  # --- Gradio μΈν„°νŽ˜μ΄μŠ€ ꡬ성 ---
354
- with gr.Blocks(title="넀이버 λΈ”λ‘œκ·Έ ν˜•νƒœμ†Œ 뢄석 μ„œλΉ„μŠ€", css=custom_css) as demo:
355
- gr.HTML("<div class='custom-header'>넀이버 λΈ”λ‘œκ·Έ ν˜•νƒœμ†Œ 뢄석 μ„œλΉ„μŠ€</div>")
356
- # λΈ”λ‘œκ·Έ 링크와 μŠ€ν¬λž˜ν•‘ μ‹€ν–‰ λ²„νŠΌμ„ ν•œ κ·Έλ£Ή 내에 배치 (λ²„νŠΌμ€ κ°€μš΄λ° μ •λ ¬)
357
- with gr.Group(elem_classes="custom-group"):
358
- with gr.Row():
359
- blog_url_input = gr.Textbox(label="넀이버 λΈ”λ‘œκ·Έ 링크", placeholder="예: https://blog.naver.com/ssboost/222983068507", lines=1)
360
- with gr.Row(elem_classes="centered"):
361
- scrape_button = gr.Button("μŠ€ν¬λž˜ν•‘ μ‹€ν–‰", elem_classes="custom-button")
362
- with gr.Group(elem_classes="custom-group"):
363
- blog_content_box = gr.Textbox(label="λΈ”λ‘œκ·Έ λ‚΄μš© (μˆ˜μ • κ°€λŠ₯)", lines=10, placeholder="μŠ€ν¬λž˜ν•‘λœ λΈ”λ‘œκ·Έ λ‚΄μš©μ΄ 여기에 ν‘œμ‹œλ©λ‹ˆλ‹€.")
364
- with gr.Group(elem_classes="custom-group"):
365
- with gr.Row():
366
- remove_freq_checkbox = gr.Checkbox(label="λΉˆλ„μˆ˜1 제거", value=True, elem_classes="custom-checkbox")
367
- with gr.Row():
368
- # "λΉˆλ„μˆ˜1 제거" μ•„λž˜μ— "직접 ν‚€μ›Œλ“œ μž…λ ₯만 뢄석" μ²΄ν¬λ°•μŠ€ 배치
369
- direct_keyword_only_checkbox = gr.Checkbox(label="직접 ν‚€μ›Œλ“œ μž…λ ₯만 뢄석", value=False, elem_classes="custom-checkbox")
370
- with gr.Row():
371
- direct_keyword_box = gr.Textbox(label="직접 ν‚€μ›Œλ“œ μž…λ ₯ (μ—”ν„° λ˜λŠ” ','둜 ꡬ뢄)", lines=2, placeholder="예: ν‚€μ›Œλ“œ1, ν‚€μ›Œλ“œ2\nν‚€μ›Œλ“œ3")
372
- with gr.Group(elem_classes="custom-group"):
373
- with gr.Row(elem_classes="centered"):
374
- analyze_button = gr.Button("뢄석 μ‹€ν–‰", elem_classes="custom-button")
375
- with gr.Group(elem_classes="custom-group custom-result"):
376
- result_df = gr.Dataframe(label="톡합 뢄석 κ²°κ³Ό (단어, λΉˆλ„μˆ˜, κ²€μƒ‰λŸ‰, λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜, μ§μ ‘μž…λ ₯)", interactive=True)
377
- with gr.Group(elem_classes="custom-group"):
378
- excel_file = gr.File(label="Excel λ‹€μš΄λ‘œλ“œ")
379
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
  # 이벀트 μ—°κ²°
381
  scrape_button.click(fn=fetch_blog_content, inputs=blog_url_input, outputs=blog_content_box)
382
- analyze_button.click(fn=analysis_handler,
383
- inputs=[blog_content_box, remove_freq_checkbox, direct_keyword_box, direct_keyword_only_checkbox],
384
- outputs=[result_df, excel_file])
 
 
385
 
386
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
387
  debug_log("Gradio μ•± μ‹€ν–‰ μ‹œμž‘")
388
- demo.launch()
389
- debug_log("Gradio μ•± μ‹€ν–‰ μ’…λ£Œ")
 
1
  import gradio as gr
2
  import requests
3
  from bs4 import BeautifulSoup
4
+ import urllib.parse # iframe 경둜 보정을 μœ„ν•œ λͺ¨λ“ˆ
5
  import re
6
  import logging
7
  import tempfile
8
  import pandas as pd
9
+ import mecab # python?mecab?ko 라이브러리 μ‚¬μš©
10
  import os
11
  import time
12
  import hmac
13
  import hashlib
14
  import base64
15
+ from concurrent.futures import ThreadPoolExecutor, as_completed
16
+
17
+ # --- 병렬 처리 μ„€μ • ---
18
+ # API 호좜 μ œν•œμ— 맞좰 적절히 μ‘°μ ˆν•˜μ„Έμš”.
19
+ # λ„ˆλ¬΄ 높은 값은 API μ œν•œμ— 걸릴 수 μžˆμŠ΅λ‹ˆλ‹€.
20
+ MAX_WORKERS_RELATED_KEYWORDS = 5 # fetch_related_keywords 병렬 μž‘μ—…μž 수
21
+ MAX_WORKERS_BLOG_COUNT = 10 # fetch_blog_count 병렬 μž‘μ—…μž 수
22
+
23
 
24
  # 디버깅(둜그)용 ν•¨μˆ˜
25
  def debug_log(message: str):
26
+ print(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] [DEBUG] {message}")
27
 
28
  # --- 넀이버 λΈ”λ‘œκ·Έ μŠ€ν¬λž˜ν•‘ ---
29
  def scrape_naver_blog(url: str) -> str:
 
37
  )
38
  }
39
  try:
40
+ response = requests.get(url, headers=headers, timeout=10)
41
  debug_log("HTTP GET μš”μ²­(메인 νŽ˜μ΄μ§€) μ™„λ£Œ")
42
  if response.status_code != 200:
43
  debug_log(f"μš”μ²­ μ‹€νŒ¨, μƒνƒœμ½”λ“œ: {response.status_code}")
 
47
  iframe = soup.select_one("iframe#mainFrame")
48
  if not iframe:
49
  debug_log("iframe#mainFrame νƒœκ·Έλ₯Ό 찾을 수 μ—†μŠ΅λ‹ˆλ‹€.")
50
+ # 일뢀 λΈ”λ‘œκ·ΈλŠ” mainFrame이 없을 수 있음. λ³Έλ¬Έ 직접 μ‹œλ„
51
+ content_div_direct = soup.select_one('.se-main-container')
52
+ if content_div_direct:
53
+ title_div_direct = soup.select_one('.se-module.se-module-text.se-title-text')
54
+ title = title_div_direct.get_text(strip=True) if title_div_direct else "제λͺ©μ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€."
55
+ content = content_div_direct.get_text("\n", strip=True)
56
+ debug_log("iframe 없이 λ³Έλ¬Έ 직접 μΆ”μΆœ μ™„λ£Œ")
57
+ return f"[제λͺ©]\n{title}\n\n[λ³Έλ¬Έ]\n{content}"
58
+ return "λ³Έλ¬Έ iframe을 찾을 수 μ—†μŠ΅λ‹ˆλ‹€. (λ³Έλ¬Έ 직접 μΆ”μΆœ μ‹€νŒ¨)"
59
+
60
  iframe_src = iframe.get("src")
61
  if not iframe_src:
62
  debug_log("iframe srcκ°€ μ‘΄μž¬ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.")
63
  return "λ³Έλ¬Έ iframe의 srcλ₯Ό 찾을 수 μ—†μŠ΅λ‹ˆλ‹€."
64
+
65
+ # iframe_srcκ°€ μ ˆλŒ€ URL이 μ•„λ‹Œ 경우λ₯Ό λŒ€λΉ„
66
+ if iframe_src.startswith("//"):
67
+ parsed_iframe_url = "https:" + iframe_src
68
+ elif iframe_src.startswith("/"):
69
+ parsed_main_url = urllib.parse.urlparse(url)
70
+ parsed_iframe_url = urllib.parse.urlunparse(
71
+ (parsed_main_url.scheme, parsed_main_url.netloc, iframe_src, None, None, None)
72
+ )
73
+ else:
74
+ parsed_iframe_url = urllib.parse.urljoin(url, iframe_src)
75
+
76
  debug_log(f"iframe νŽ˜μ΄μ§€ μš”μ²­ URL: {parsed_iframe_url}")
77
+ iframe_response = requests.get(parsed_iframe_url, headers=headers, timeout=10)
78
  debug_log("HTTP GET μš”μ²­(iframe νŽ˜μ΄μ§€) μ™„λ£Œ")
79
  if iframe_response.status_code != 200:
80
  debug_log(f"iframe μš”μ²­ μ‹€νŒ¨, μƒνƒœμ½”λ“œ: {iframe_response.status_code}")
81
  return f"iframeμ—μ„œ 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€. μƒνƒœμ½”λ“œ: {iframe_response.status_code}"
82
  iframe_soup = BeautifulSoup(iframe_response.text, "html.parser")
83
  debug_log("HTML νŒŒμ‹±(iframe νŽ˜μ΄μ§€) μ™„λ£Œ")
84
+
85
+ # 제λͺ© μΆ”μΆœ (λ‹€μ–‘ν•œ ꡬ쑰 μ‹œλ„)
86
+ title_selectors = [
87
+ '.se-module.se-module-text.se-title-text', # 일반적인 μŠ€λ§ˆνŠΈμ—λ””ν„° ONE
88
+ '.title_text', # ꡬ버전 에디터 λ˜λŠ” λ‹€λ₯Έ ꡬ쑰
89
+ 'div[class*="title"] h3',
90
+ 'h1', 'h2', 'h3' # 일반적인 제λͺ© νƒœκ·Έ
91
+ ]
92
+ title = "제λͺ©μ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€."
93
+ for selector in title_selectors:
94
+ title_div = iframe_soup.select_one(selector)
95
+ if title_div:
96
+ title = title_div.get_text(strip=True)
97
+ break
98
  debug_log(f"μΆ”μΆœλœ 제λͺ©: {title}")
99
+
100
+ # λ³Έλ¬Έ μΆ”μΆœ (λ‹€μ–‘ν•œ ꡬ쑰 μ‹œλ„)
101
+ content_selectors = [
102
+ '.se-main-container', # μŠ€λ§ˆνŠΈμ—λ””ν„° ONE
103
+ 'div#content', # ꡬ버전 에디터
104
+ 'div.post_ct', # 일뢀 λΈ”λ‘œκ·Έ ꡬ쑰
105
+ 'article', 'main' # μ‹œλ§¨ν‹± νƒœκ·Έ
106
+ ]
107
+ content = "본문을 찾을 수 μ—†μŠ΅λ‹ˆλ‹€."
108
+ for selector in content_selectors:
109
+ content_div = iframe_soup.select_one(selector)
110
+ if content_div:
111
+ # λΆˆν•„μš”ν•œ 슀크립트, μŠ€νƒ€μΌ νƒœκ·Έ 제거
112
+ for s in content_div(['script', 'style']):
113
+ s.decompose()
114
+ content = content_div.get_text("\n", strip=True)
115
+ break
116
+
117
  debug_log("λ³Έλ¬Έ μΆ”μΆœ μ™„λ£Œ")
118
  result = f"[제λͺ©]\n{title}\n\n[λ³Έλ¬Έ]\n{content}"
119
  debug_log("제λͺ©κ³Ό λ³Έλ¬Έ ν•©μΉ¨ μ™„λ£Œ")
120
  return result
121
+ except requests.exceptions.Timeout:
122
+ debug_log(f"μš”μ²­ μ‹œκ°„ 초과: {url}")
123
+ return f"μŠ€ν¬λž˜ν•‘ 쀑 μ‹œκ°„ μ΄ˆκ³Όκ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {url}"
124
  except Exception as e:
125
+ debug_log(f"μŠ€ν¬λž˜ν•‘ μ—λŸ¬ λ°œμƒ: {str(e)}")
126
  return f"μŠ€ν¬λž˜ν•‘ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
127
 
128
  # --- ν˜•νƒœμ†Œ 뢄석 (μ°Έμ‘°μ½”λ“œ-1) ---
129
  def analyze_text(text: str):
130
+ logging.basicConfig(level=logging.INFO) # INFO 레벨둜 λ³€κ²½ν•˜μ—¬ λ„ˆλ¬΄ λ§Žμ€ 둜그 λ°©μ§€
131
  logger = logging.getLogger(__name__)
132
+ # logger.debug("원본 ν…μŠ€νŠΈ: %s", text) # λ„ˆλ¬΄ κΈΈ 수 μžˆμœΌλ―€λ‘œ 주석 처리
133
+ filtered_text = re.sub(r'[^κ°€-힣a-zA-Z0-9\s]', '', text) # μ˜μ–΄, 숫자, 곡백 포함
134
+ # logger.debug("ν•„ν„°λ§λœ ν…μŠ€νŠΈ: %s", filtered_text)
135
+ if not filtered_text.strip():
136
+ logger.info("μœ νš¨ν•œ ν…μŠ€νŠΈκ°€ μ—†μŒ (필터링 ν›„).")
137
+ return pd.DataFrame(columns=["단어", "λΉˆλ„μˆ˜"]), ""
138
+ try:
139
+ mecab_instance = mecab.MeCab()
140
+ tokens = mecab_instance.pos(filtered_text)
141
+ except Exception as e:
142
+ logger.error(f"MeCab ν˜•νƒœμ†Œ 뢄석 쀑 였λ₯˜: {e}")
143
  return pd.DataFrame(columns=["단어", "λΉˆλ„μˆ˜"]), ""
144
+
145
+ # logger.debug("ν˜•νƒœμ†Œ 뢄석 κ²°κ³Ό: %s", tokens)
 
146
  freq = {}
147
  for word, pos in tokens:
148
+ # 일반λͺ…사(NNG), 고유λͺ…사(NNP), μ™Έκ΅­μ–΄(SL), 숫자(SN) λ“± 포함, ν•œ κΈ€μž λ‹¨μ–΄λŠ” μ œμ™Έ (선택 사항)
149
+ if word and word.strip() and (pos.startswith("NN") or pos in ["SL", "SH"]) and len(word) > 1 :
150
  freq[word] = freq.get(word, 0) + 1
151
+ # logger.debug("단어: %s, ν’ˆμ‚¬: %s, λΉˆλ„: %d", word, pos, freq[word])
152
+
153
  sorted_freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)
154
+ # logger.debug("μ •λ ¬λœ 단어 λΉˆλ„: %s", sorted_freq)
155
  df = pd.DataFrame(sorted_freq, columns=["단어", "λΉˆλ„μˆ˜"])
156
+ logger.info(f"ν˜•νƒœμ†Œ 뢄석 DataFrame 생성됨, shape: {df.shape}")
157
+
158
+ temp_file_path = ""
159
+ if not df.empty:
160
+ try:
161
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx", mode='w+b') as temp_file:
162
+ df.to_excel(temp_file.name, index=False, engine='openpyxl')
163
+ temp_file_path = temp_file.name
164
+ logger.info(f"Excel 파일 생성됨: {temp_file_path}")
165
+ except Exception as e:
166
+ logger.error(f"Excel 파일 μ €μž₯ 쀑 였λ₯˜: {e}")
167
+ temp_file_path = "" # 였λ₯˜ λ°œμƒ μ‹œ 경둜 μ΄ˆκΈ°ν™”
168
+
169
+ return df, temp_file_path
170
+
171
 
172
  # --- 넀이버 검색 및 κ΄‘κ³  API κ΄€λ ¨ (μ°Έμ‘°μ½”λ“œ-2) ---
173
  def generate_signature(timestamp, method, uri, secret_key):
 
186
  "X-Signature": signature
187
  }
188
 
189
+ # API ν‚€ ν™˜κ²½ λ³€μˆ˜ 확인 ν•¨μˆ˜
190
+ def get_env_variable(var_name):
191
+ value = os.environ.get(var_name)
192
+ if value is None:
193
+ debug_log(f"ν™˜κ²½ λ³€μˆ˜ '{var_name}'κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€. API 호좜이 μ‹€νŒ¨ν•  수 μžˆμŠ΅λ‹ˆλ‹€.")
194
+ # ν•„μš”μ‹œ μ—¬κΈ°μ„œ raise Exception λ˜λŠ” οΏ½οΏ½οΏ½λ³Έκ°’ λ°˜ν™˜
195
+ return value
196
+
197
  def fetch_related_keywords(keyword):
198
+ debug_log(f"fetch_related_keywords 호좜 μ‹œμž‘, ν‚€μ›Œλ“œ: {keyword}")
199
+ API_KEY = get_env_variable("NAVER_API_KEY")
200
+ SECRET_KEY = get_env_variable("NAVER_SECRET_KEY")
201
+ CUSTOMER_ID = get_env_variable("NAVER_CUSTOMER_ID")
202
+
203
+ if not all([API_KEY, SECRET_KEY, CUSTOMER_ID]):
204
+ debug_log(f"넀이버 κ΄‘κ³  API ν‚€ 정보 λΆ€μ‘±μœΌλ‘œ '{keyword}' μ—°κ΄€ ν‚€μ›Œλ“œ 쑰회λ₯Ό κ±΄λ„ˆ<0xEB><0xB5>λ‹ˆλ‹€.")
205
+ return pd.DataFrame()
206
+
207
  BASE_URL = "https://api.naver.com"
208
  uri = "/keywordstool"
209
  method = "GET"
210
+
211
+ try:
212
+ headers = get_header(method, uri, API_KEY, SECRET_KEY, CUSTOMER_ID)
213
+ params = {
214
+ "hintKeywords": keyword, # 단일 ν‚€μ›Œλ“œ λ¬Έμžμ—΄λ‘œ 전달
215
+ "showDetail": "1"
216
+ }
217
+ # hintKeywordsλŠ” 리슀트둜 받을 수 μžˆμœΌλ‚˜, μ—¬κΈ°μ„œλŠ” 단일 ν‚€μ›Œλ“œ 처리λ₯Ό κ°€μ •ν•˜κ³  λ¬Έμžμ—΄λ‘œ 전달
218
+ # λ§Œμ•½ APIκ°€ hintKeywordsλ₯Ό 리슀트둜만 λ°›λŠ”λ‹€λ©΄ [keyword]둜 μˆ˜μ • ν•„μš”
219
+
220
+ response = requests.get(BASE_URL + uri, params=params, headers=headers, timeout=10)
221
+ response.raise_for_status() # 였λ₯˜ λ°œμƒ μ‹œ μ˜ˆμ™Έ λ°œμƒ
222
+ data = response.json()
223
+
224
+ if "keywordList" not in data or not data["keywordList"]:
225
+ debug_log(f"'{keyword}'에 λŒ€ν•œ μ—°κ΄€ ν‚€μ›Œλ“œ κ²°κ³Ό μ—†μŒ.")
226
+ return pd.DataFrame() # 빈 DataFrame λ°˜ν™˜
227
+
228
+ df = pd.DataFrame(data["keywordList"])
229
+
230
+ # API 응닡에 ν•΄λ‹Ή 컬럼이 없을 경우λ₯Ό λŒ€λΉ„
231
+ df["monthlyPcQcCnt"] = df.get("monthlyPcQcCnt", 0)
232
+ df["monthlyMobileQcCnt"] = df.get("monthlyMobileQcCnt", 0)
233
+
234
+ def parse_count(x):
235
+ if pd.isna(x) or str(x).lower() == '< 10': # 넀이버 APIλŠ” 10 미만일 λ•Œ "< 10"으둜 λ°˜ν™˜
236
+ return 5 # λ˜λŠ” 0, λ˜λŠ” λ‹€λ₯Έ λŒ€ν‘œκ°’ (예: 5)
237
+ try:
238
+ return int(str(x).replace(",", ""))
239
+ except ValueError:
240
+ return 0
241
+
242
+ df["PCμ›”κ²€μƒ‰λŸ‰"] = df["monthlyPcQcCnt"].apply(parse_count)
243
+ df["λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰"] = df["monthlyMobileQcCnt"].apply(parse_count)
244
+ df["ν† νƒˆμ›”κ²€μƒ‰λŸ‰"] = df["PCμ›”κ²€μƒ‰λŸ‰"] + df["λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰"]
245
+ df.rename(columns={"relKeyword": "μ •λ³΄ν‚€μ›Œλ“œ"}, inplace=True)
246
+
247
+ # ν•„μš”ν•œ 컬럼만 선택, μ—†λŠ” 경우 λŒ€λΉ„
248
+ required_cols = ["μ •λ³΄ν‚€μ›Œλ“œ", "PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰"]
249
+ result_df = pd.DataFrame(columns=required_cols)
250
+ for col in required_cols:
251
+ if col in df.columns:
252
+ result_df[col] = df[col]
253
+ else: # ν•΄λ‹Ή 컬럼이 API 응닡에 없을 경우 κΈ°λ³Έκ°’μœΌλ‘œ 채움
254
+ if col == "μ •λ³΄ν‚€μ›Œλ“œ": # μ •λ³΄ν‚€μ›Œλ“œλŠ” ν•„μˆ˜
255
+ debug_log(f"API 응닡에 'relKeyword'κ°€ μ—†μŠ΅λ‹ˆλ‹€. '{keyword}' 처리 쀑단.")
256
+ return pd.DataFrame()
257
+ result_df[col] = 0
258
+
259
+ debug_log(f"fetch_related_keywords '{keyword}' μ™„λ£Œ, κ²°κ³Ό {len(result_df)}개")
260
+ return result_df.head(100) # μ΅œλŒ€ 100개둜 μ œν•œ
261
+
262
+ except requests.exceptions.HTTPError as http_err:
263
+ debug_log(f"HTTP 였λ₯˜ λ°œμƒ (fetch_related_keywords for '{keyword}'): {http_err} - 응닡: {response.text if 'response' in locals() else 'N/A'}")
264
+ except requests.exceptions.RequestException as req_err:
265
+ debug_log(f"μš”μ²­ 였λ₯˜ λ°œμƒ (fetch_related_keywords for '{keyword}'): {req_err}")
266
+ except Exception as e:
267
+ debug_log(f"μ•Œ 수 μ—†λŠ” 였λ₯˜ λ°œμƒ (fetch_related_keywords for '{keyword}'): {e}")
268
+ return pd.DataFrame() # 였λ₯˜ λ°œμƒ μ‹œ 빈 DataFrame λ°˜ν™˜
269
+
270
 
271
  def fetch_blog_count(keyword):
272
  debug_log(f"fetch_blog_count 호좜, ν‚€μ›Œλ“œ: {keyword}")
273
+ client_id = get_env_variable("NAVER_SEARCH_CLIENT_ID")
274
+ client_secret = get_env_variable("NAVER_SEARCH_CLIENT_SECRET")
275
+
276
+ if not client_id or not client_secret:
277
+ debug_log(f"넀이버 검색 API ν‚€ 정보 λΆ€μ‘±μœΌλ‘œ '{keyword}' λΈ”λ‘œκ·Έ 수 쑰회λ₯Ό κ±΄λ„ˆ<0xEB><0xB5>λ‹ˆλ‹€.")
278
+ return 0
279
+
280
  url = "https://openapi.naver.com/v1/search/blog.json"
281
  headers = {
282
  "X-Naver-Client-Id": client_id,
283
  "X-Naver-Client-Secret": client_secret
284
  }
285
+ params = {"query": keyword, "display": 1} # display=1둜 μ„€μ •ν•˜μ—¬ total κ°’λ§Œ λΉ λ₯΄κ²Œ 확인
286
+
287
+ try:
288
+ response = requests.get(url, headers=headers, params=params, timeout=5)
289
+ response.raise_for_status() # HTTP 였λ₯˜ λ°œμƒ μ‹œ μ˜ˆμ™Έ λ°œμƒ
290
  data = response.json()
291
+ total_count = data.get("total", 0)
292
+ debug_log(f"fetch_blog_count κ²°κ³Ό: {total_count} for '{keyword}'")
293
+ return total_count
294
+ except requests.exceptions.HTTPError as http_err:
295
+ debug_log(f"HTTP 였λ₯˜ λ°œμƒ (fetch_blog_count for '{keyword}'): {http_err} - 응닡: {response.text}")
296
+ except requests.exceptions.RequestException as req_err: # Timeout, ConnectionError λ“±
297
+ debug_log(f"μš”μ²­ 였λ₯˜ λ°œμƒ (fetch_blog_count for '{keyword}'): {req_err}")
298
+ except Exception as e: # JSONDecodeError λ“± 기타 μ˜ˆμ™Έ
299
+ debug_log(f"μ•Œ 수 μ—†λŠ” 였λ₯˜ λ°œμƒ (fetch_blog_count for '{keyword}'): {e}")
300
+ return 0 # 였λ₯˜ λ°œμƒ μ‹œ 0 λ°˜ν™˜
301
 
302
  def create_excel_file(df):
303
+ if df.empty:
304
+ debug_log("빈 DataFrame으둜 Excel νŒŒμΌμ„ μƒμ„±ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.")
305
+ # 빈 νŒŒμΌμ„ μƒμ„±ν•˜κ±°λ‚˜, None을 λ°˜ν™˜ν•˜μ—¬ Gradioμ—μ„œ μ²˜λ¦¬ν•˜λ„λ‘ ν•  수 있음
306
+ # μ—¬κΈ°μ„œλŠ” 빈 μž„μ‹œ νŒŒμΌμ„ μƒμ„±ν•˜μ—¬ λ°˜ν™˜ (Gradio File μ»΄ν¬λ„ŒνŠΈκ°€ 경둜λ₯Ό κΈ°λŒ€ν•˜λ―€λ‘œ)
307
+ with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
308
+ excel_path = tmp.name
309
+ # 빈 μ—‘μ…€ νŒŒμΌμ— ν—€λ”λ§Œμ΄λΌλ„ 써주렀면
310
+ # pd.DataFrame(columns=df.columns).to_excel(excel_path, index=False)
311
+ # μ•„λ‹ˆλ©΄ κ·Έλƒ₯ 빈 νŒŒμΌμ„ λ°˜ν™˜
312
+ return excel_path
313
+
314
+ try:
315
+ with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False, mode='w+b') as tmp:
316
+ excel_path = tmp.name
317
+ df.to_excel(excel_path, index=False, engine='openpyxl')
318
+ debug_log(f"Excel 파일 생성됨: {excel_path}")
319
+ return excel_path
320
+ except Exception as e:
321
+ debug_log(f"Excel 파일 생성 쀑 였λ₯˜: {e}")
322
+ # 였λ₯˜ λ°œμƒ μ‹œ 빈 파일 κ²½λ‘œλΌλ„ λ°˜ν™˜ (Gradio ν˜Έν™˜μ„±)
323
+ with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
324
+ return tmp.name
325
+
326
 
327
  def process_keyword(keywords: str, include_related: bool):
328
+ debug_log(f"process_keyword 호좜 μ‹œμž‘, ν‚€μ›Œλ“œλ“€: '{keywords[:100]}...', 연관검색어 포함: {include_related}")
329
+ input_keywords_orig = [k.strip() for k in keywords.splitlines() if k.strip()]
330
+
331
+ if not input_keywords_orig:
332
+ debug_log("μž…λ ₯된 ν‚€μ›Œλ“œκ°€ μ—†μŠ΅λ‹ˆλ‹€.")
333
+ return pd.DataFrame(columns=["μ •λ³΄ν‚€μ›Œλ“œ", "PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰", "λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜"]), ""
334
+
335
+ all_related_keywords_dfs = []
336
+
337
+ # 1. fetch_related_keywords 병렬 처리
338
+ debug_log(f"μ—°κ΄€ ν‚€οΏ½οΏ½λ“œ 쑰회 병렬 처리 μ‹œμž‘ (μ΅œλŒ€ μž‘μ—…μž 수: {MAX_WORKERS_RELATED_KEYWORDS})")
339
+ with ThreadPoolExecutor(max_workers=MAX_WORKERS_RELATED_KEYWORDS) as executor:
340
+ future_to_keyword_related = {
341
+ executor.submit(fetch_related_keywords, kw): kw for kw in input_keywords_orig
342
+ }
343
+ for i, future in enumerate(as_completed(future_to_keyword_related)):
344
+ kw = future_to_keyword_related[future]
345
+ try:
346
+ df_kw_related = future.result() # DataFrame λ°˜ν™˜
347
+ if not df_kw_related.empty:
348
+ # 원본 ν‚€μ›Œλ“œκ°€ 결과에 ν¬ν•¨λ˜μ–΄ μžˆλŠ”μ§€ ν™•μΈν•˜κ³ , μ—†μœΌλ©΄ μΆ”κ°€ μ‹œλ„ (APIκ°€ 항상 relKeyword둜 μžμ‹ μ„ μ£Όμ§„ μ•ŠμŒ)
349
+ # ν•˜μ§€λ§Œ fetch_related_keywordsμ—μ„œ 이미 hintKeywordλ₯Ό 기반으둜 κ²€μƒ‰ν•˜λ―€λ‘œ,
350
+ # μΌλ°˜μ μœΌλ‘œλŠ” ν•΄λ‹Ή ν‚€μ›Œλ“œ 정보가 μžˆκ±°λ‚˜, μ—°κ΄€ ν‚€μ›Œλ“œλ§Œ λ‚˜μ˜΄.
351
+ # μ—¬κΈ°μ„œλŠ” API 응닡을 κ·ΈλŒ€λ‘œ ν™œμš©.
352
+
353
+ # 첫 번째 μž…λ ₯ ν‚€μ›Œλ“œμ΄κ³ , μ—°κ΄€ ν‚€μ›Œλ“œ 포함 μ˜΅μ…˜μ΄ 켜져 있으면 λͺ¨λ“  μ—°κ΄€ ν‚€μ›Œλ“œλ₯Ό μΆ”κ°€
354
+ # κ·Έ μ™Έμ˜ κ²½μš°μ—λŠ” ν•΄λ‹Ή ν‚€μ›Œλ“œ 자체의 μ •λ³΄λ§Œ (μžˆλ‹€λ©΄) μ‚¬μš©ν•˜κ±°λ‚˜, μ΅œμƒλ‹¨ ν‚€μ›Œλ“œ μ‚¬μš©
355
+ if include_related and kw == input_keywords_orig[0]:
356
+ all_related_keywords_dfs.append(df_kw_related)
357
+ debug_log(f"첫 번째 ν‚€μ›Œλ“œ '{kw}'의 λͺ¨λ“  μ—°κ΄€ ν‚€μ›Œλ“œ ({len(df_kw_related)}개) 좔가됨.")
358
+ else:
359
+ # ν•΄λ‹Ή ν‚€μ›Œλ“œμ™€ μΌμΉ˜ν•˜λŠ” 행을 μ°Ύκ±°λ‚˜, μ—†μœΌλ©΄ APIκ°€ λ°˜ν™˜ν•œ 첫번째 행을 μ‚¬μš©
360
+ row_kw = df_kw_related[df_kw_related["μ •λ³΄ν‚€μ›Œλ“œ"] == kw]
361
+ if not row_kw.empty:
362
+ all_related_keywords_dfs.append(row_kw)
363
+ debug_log(f"ν‚€μ›Œλ“œ '{kw}'의 직접 정보 좔가됨.")
364
+ elif not df_kw_related.empty : # 직접 μ •λ³΄λŠ” μ—†μ§€λ§Œ μ—°κ΄€ ν‚€μ›Œλ“œλŠ” μžˆμ„ λ•Œ
365
+ all_related_keywords_dfs.append(df_kw_related.head(1)) # κ°€μž₯ μ—°κ΄€μ„± 높은 ν‚€μ›Œλ“œ μΆ”κ°€
366
+ debug_log(f"ν‚€μ›Œλ“œ '{kw}'의 직접 μ •λ³΄λŠ” μ—†μœΌλ‚˜, κ°€μž₯ μ—°κ΄€μ„± 높은 ν‚€μ›Œλ“œ 1개 좔가됨.")
367
+ # else: ν‚€μ›Œλ“œ 정보도, μ—°κ΄€ 정보도 없을 λ•Œ (df_kw_relatedκ°€ λΉ„μ–΄μžˆμŒ)
368
+
369
+ debug_log(f"'{kw}' μ—°κ΄€ ν‚€μ›Œλ“œ 처리 μ™„λ£Œ ({i+1}/{len(input_keywords_orig)})")
370
+ except Exception as e:
371
+ debug_log(f"'{kw}' μ—°κ΄€ ν‚€μ›Œλ“œ 쑰회 쀑 병렬 μž‘μ—… 였λ₯˜: {e}")
372
+
373
+ if not all_related_keywords_dfs:
374
+ debug_log("μ—°κ΄€ ν‚€μ›Œλ“œ 쑰회 κ²°κ³Όκ°€ λͺ¨λ‘ λΉ„μ–΄μžˆμŠ΅λ‹ˆλ‹€.")
375
+ # 빈 DataFrame에 λΈ”λ‘œκ·Έ λ¬Έμ„œμˆ˜ 컬럼 μΆ”κ°€
376
+ empty_df = pd.DataFrame(columns=["μ •λ³΄ν‚€μ›Œλ“œ", "PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰"])
377
+ empty_df["λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜"] = None
378
+ return empty_df, create_excel_file(empty_df)
379
+
380
+ result_df = pd.concat(all_related_keywords_dfs, ignore_index=True)
381
+ result_df.drop_duplicates(subset=["μ •λ³΄ν‚€μ›Œλ“œ"], inplace=True) # 쀑볡 제거
382
+ debug_log(f"μ—°κ΄€ ν‚€μ›Œλ“œ 병렬 처리 μ™„λ£Œ. ν†΅ν•©λœ DataFrame shape: {result_df.shape}")
383
+
384
+ # 2. fetch_blog_count 병렬 처리
385
+ keywords_for_blog_count = result_df["μ •λ³΄ν‚€μ›Œλ“œ"].dropna().unique().tolist()
386
+ blog_counts_map = {}
387
+
388
+ if keywords_for_blog_count:
389
+ debug_log(f"λΈ”λ‘œκ·Έ λ¬Έμ„œ 수 쑰회 병렬 처리 μ‹œμž‘ (ν‚€μ›Œλ“œ {len(keywords_for_blog_count)}개, μ΅œλŒ€ μž‘μ—…μž 수: {MAX_WORKERS_BLOG_COUNT})")
390
+ with ThreadPoolExecutor(max_workers=MAX_WORKERS_BLOG_COUNT) as executor:
391
+ future_to_keyword_blog = {
392
+ executor.submit(fetch_blog_count, kw): kw for kw in keywords_for_blog_count
393
+ }
394
+ for i, future in enumerate(as_completed(future_to_keyword_blog)):
395
+ kw = future_to_keyword_blog[future]
396
+ try:
397
+ count = future.result() # 숫자 λ°˜ν™˜
398
+ blog_counts_map[kw] = count
399
+ if (i+1) % 50 == 0: # λ„ˆλ¬΄ λ§Žμ€ 둜그 λ°©μ§€
400
+ debug_log(f"λΈ”λ‘œκ·Έ 수 쑰회 μ§„ν–‰ 쀑... ({i+1}/{len(keywords_for_blog_count)})")
401
+ except Exception as e:
402
+ debug_log(f"'{kw}' λΈ”λ‘œκ·Έ 수 쑰회 쀑 병렬 μž‘μ—… 였λ₯˜: {e}")
403
+ blog_counts_map[kw] = 0 # 였λ₯˜ μ‹œ 0으둜 처리
404
+
405
+ result_df["λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜"] = result_df["μ •λ³΄ν‚€μ›Œλ“œ"].map(blog_counts_map).fillna(0).astype(int)
406
+ debug_log("λΈ”λ‘œκ·Έ λ¬Έμ„œ 수 병렬 처리 μ™„λ£Œ.")
407
  else:
408
+ result_df["λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜"] = 0 # μ‘°νšŒν•  ν‚€μ›Œλ“œκ°€ μ—†μœΌλ©΄ 0으둜 채움
409
+
410
  result_df.sort_values(by="ν† νƒˆμ›”κ²€μƒ‰λŸ‰", ascending=False, inplace=True)
411
+ debug_log(f"process_keyword μ΅œμ’… μ™„λ£Œ. DataFrame shape: {result_df.shape}")
412
+
413
+ # μ΅œμ’… 컬럼 μˆœμ„œ 및 쑴재 μ—¬λΆ€ 확인
414
+ final_columns = ["μ •λ³΄ν‚€μ›Œλ“œ", "PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰", "λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜"]
415
+ for col in final_columns:
416
+ if col not in result_df.columns:
417
+ result_df[col] = 0 if col != "μ •λ³΄ν‚€μ›Œλ“œ" else "" # μ—†λŠ” μ»¬λŸΌμ€ κΈ°λ³Έκ°’μœΌλ‘œ 채움
418
+
419
+ result_df = result_df[final_columns] # 컬럼 μˆœμ„œ κ³ μ •
420
+
421
  return result_df, create_excel_file(result_df)
422
 
423
+
424
  # --- ν˜•νƒœμ†Œ 뢄석과 κ²€μƒ‰λŸ‰/λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜ 병합 ---
425
  def morphological_analysis_and_enrich(text: str, remove_freq1: bool):
426
  debug_log("morphological_analysis_and_enrich ν•¨μˆ˜ μ‹œμž‘")
427
+ df_freq, _ = analyze_text(text) # μ—‘μ…€ 파일 κ²½λ‘œλŠ” μ—¬κΈ°μ„  μ‚¬μš© μ•ˆ 함
428
+
429
  if df_freq.empty:
430
  debug_log("ν˜•νƒœμ†Œ 뢄석 κ²°κ³Όκ°€ 빈 λ°μ΄ν„°ν”„λ ˆμž„μž…λ‹ˆλ‹€.")
431
+ return pd.DataFrame(columns=["단어", "λΉˆλ„μˆ˜", "PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰", "λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜"]), ""
432
+
433
  if remove_freq1:
434
+ before_count = len(df_freq)
435
+ df_freq = df_freq[df_freq["λΉˆλ„μˆ˜"] > 1].copy() # .copy() μΆ”κ°€
436
+ debug_log(f"λΉˆλ„μˆ˜ 1 제거 적용됨. {before_count} -> {len(df_freq)}")
437
+
438
+ if df_freq.empty:
439
+ debug_log("λΉˆλ„μˆ˜ 1 제거 ν›„ 데이터가 μ—†μŠ΅λ‹ˆλ‹€.")
440
+ return pd.DataFrame(columns=["단어", "λΉˆλ„μˆ˜", "PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰", "λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜"]), ""
441
+
442
+ keywords_from_morph = "\n".join(df_freq["단어"].tolist())
443
+ debug_log(f"ν˜•νƒœμ†Œ 뢄석 기반 ν‚€μ›Œλ“œ ({len(df_freq['단어'])}개)에 λŒ€ν•œ 정보 쑰회 μ‹œμž‘")
444
+
445
+ # process_keywordλŠ” μ—°κ΄€ ν‚€μ›Œλ“œλ₯Ό ν¬ν•¨ν•˜μ§€ μ•Šλ„λ‘ 호좜 (include_related=False)
446
+ df_keyword_info, _ = process_keyword(keywords_from_morph, include_related=False)
447
+ debug_log("ν˜•νƒœμ†Œ 뢄석 ν‚€μ›Œλ“œμ— λŒ€ν•œ κ²€μƒ‰λŸ‰ 및 λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜ 쑰회 μ™„λ£Œ")
448
+
449
+ if df_keyword_info.empty:
450
+ debug_log("ν˜•νƒœμ†Œ 뢄석 ν‚€μ›Œλ“œμ— λŒ€ν•œ API 정보 쑰회 κ²°κ³Όκ°€ μ—†μŠ΅λ‹ˆλ‹€.")
451
+ # df_freq에 빈 μ»¬λŸΌλ“€ μΆ”κ°€
452
+ for col in ["PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰", "λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜"]:
453
+ df_freq[col] = None
454
+ merged_df = df_freq
455
+ else:
456
+ merged_df = pd.merge(df_freq, df_keyword_info, left_on="단어", right_on="μ •λ³΄ν‚€μ›Œλ“œ", how="left")
457
+ if "μ •λ³΄ν‚€μ›Œλ“œ" in merged_df.columns: # merge ν›„ μ •λ³΄ν‚€μ›Œλ“œ 컬럼이 생겼닀면 μ‚­μ œ
458
+ merged_df.drop(columns=["μ •λ³΄ν‚€μ›Œλ“œ"], inplace=True, errors='ignore')
459
+
460
+ # λˆ„λ½λœ 컬럼 κΈ°λ³Έκ°’μœΌλ‘œ μ±„μš°κΈ°
461
+ expected_cols = ["단어", "λΉˆλ„μˆ˜", "PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰", "λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜"]
462
+ for col in expected_cols:
463
+ if col not in merged_df.columns:
464
+ merged_df[col] = None if col not in ["λΉˆλ„μˆ˜"] else 0
465
+
466
+ merged_df = merged_df[expected_cols] # 컬럼 μˆœμ„œ κ³ μ •
467
+
468
  merged_excel_path = create_excel_file(merged_df)
469
  debug_log("morphological_analysis_and_enrich ν•¨μˆ˜ μ™„λ£Œ")
470
  return merged_df, merged_excel_path
471
 
472
+
473
  # --- 직접 ν‚€μ›Œλ“œ 뢄석 (단독 뢄석) ---
474
  def direct_keyword_analysis(text: str, keyword_input: str):
475
  debug_log("direct_keyword_analysis ν•¨μˆ˜ μ‹œμž‘")
476
+ direct_keywords_list = [kw.strip() for kw in re.split(r'[\n,]+', keyword_input) if kw.strip()]
477
+ debug_log(f"μž…λ ₯된 직접 ν‚€μ›Œλ“œ λͺ©λ‘: {direct_keywords_list}")
478
+
479
+ if not direct_keywords_list:
480
+ debug_log("직접 μž…λ ₯된 ν‚€μ›Œλ“œκ°€ μ—†μŠ΅λ‹ˆλ‹€.")
481
+ return pd.DataFrame(columns=["ν‚€μ›Œλ“œ", "λΉˆλ„μˆ˜"]), ""
482
+
483
+ # 1. λ³Έλ¬Έ λ‚΄ λΉˆλ„μˆ˜ 계산
484
+ results_freq = []
485
+ for kw in direct_keywords_list:
486
+ count = text.count(kw) # λŒ€μ†Œλ¬Έμž ꡬ뢄, μ •ν™•ν•œ λ¬Έμžμ—΄ 카운트
487
+ results_freq.append({"ν‚€μ›Œλ“œ": kw, "λΉˆλ„μˆ˜": count})
488
+ debug_log(f"직접 ν‚€μ›Œλ“œ '{kw}'의 λ³Έλ¬Έ λ‚΄ λΉˆλ„μˆ˜: {count}")
489
+ df_direct_freq = pd.DataFrame(results_freq)
490
+
491
+ # 2. APIλ₯Ό 톡해 κ²€μƒ‰λŸ‰ 및 λΈ”λ‘œκ·Έ 수 쑰회 (병렬 처리된 process_keyword μ‚¬μš©)
492
+ # μ—¬κΈ°μ„œλŠ” 각 직접 ν‚€μ›Œλ“œμ— λŒ€ν•œ μ •λ³΄λ§Œ ν•„μš”ν•˜λ―€λ‘œ include_related=False
493
+ keywords_for_api = "\n".join(direct_keywords_list)
494
+ df_direct_api_info, _ = process_keyword(keywords_for_api, include_related=False)
495
+
496
+ # 3. λΉˆλ„μˆ˜ 결과와 API κ²°κ³Ό 병합
497
+ if not df_direct_api_info.empty:
498
+ # API 결과의 'μ •λ³΄ν‚€μ›Œλ“œ'λ₯Ό 'ν‚€μ›Œλ“œ'둜 λ³€κ²½ν•˜μ—¬ 병합 κΈ°μ€€ 톡일
499
+ df_direct_api_info.rename(columns={"μ •λ³΄ν‚€μ›Œλ“œ": "ν‚€μ›Œλ“œ"}, inplace=True)
500
+ merged_df = pd.merge(df_direct_freq, df_direct_api_info, on="ν‚€μ›Œλ“œ", how="left")
501
+ else:
502
+ merged_df = df_direct_freq
503
+ for col in ["PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰", "λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜"]:
504
+ merged_df[col] = None # API 정보가 없을 경우 빈 컬럼 μΆ”κ°€
505
+
506
+ # 컬럼 μˆœμ„œ 및 κΈ°λ³Έκ°’ 정리
507
+ final_cols = ["ν‚€μ›Œλ“œ", "λΉˆλ„μˆ˜", "PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰", "λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜"]
508
+ for col in final_cols:
509
+ if col not in merged_df.columns:
510
+ merged_df[col] = 0 if col != "ν‚€μ›Œλ“œ" else ""
511
+ merged_df = merged_df[final_cols]
512
+
513
+
514
+ excel_path = create_excel_file(merged_df)
515
  debug_log("direct_keyword_analysis ν•¨μˆ˜ μ™„λ£Œ")
516
+ return merged_df, excel_path
517
+
518
 
519
  # --- 톡합 뢄석 (ν˜•νƒœμ†Œ 뢄석 + 직접 ν‚€μ›Œλ“œ 뢄석) ---
520
  def combined_analysis(blog_text: str, remove_freq1: bool, direct_keyword_input: str):
521
  debug_log("combined_analysis ν•¨μˆ˜ μ‹œμž‘")
522
+
523
+ # 1. ν˜•νƒœμ†Œ 뢄석 기반 κ²°κ³Ό (API 정보 포함)
524
+ df_morph, _ = morphological_analysis_and_enrich(blog_text, remove_freq1)
525
+ # df_morph 컬럼: "단어", "λΉˆλ„μˆ˜", "PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰", "λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜"
526
+
527
+ # 2. 직접 μž…λ ₯ ν‚€μ›Œλ“œ 처리
528
+ direct_keywords_list = [kw.strip() for kw in re.split(r'[\n,]+', direct_keyword_input) if kw.strip()]
529
+ debug_log(f"톡합 뢄석 - μž…λ ₯된 직접 ν‚€μ›Œλ“œ: {direct_keywords_list}")
530
+
531
+ if not direct_keywords_list: # 직접 μž…λ ₯ ν‚€μ›Œλ“œκ°€ μ—†μœΌλ©΄ ν˜•νƒœμ†Œ 뢄석 결과만 λ°˜ν™˜
532
+ if "μ§μ ‘μž…λ ₯" not in df_morph.columns and not df_morph.empty:
533
+ df_morph["μ§μ ‘μž…λ ₯"] = "" # μ§μ ‘μž…λ ₯ 컬럼 μΆ”κ°€
534
+ # 컬럼 μˆœμ„œ μ‘°μ •
535
+ cols = ["단어", "λΉˆλ„μˆ˜", "PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰", "λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜", "μ§μ ‘μž…λ ₯"]
536
+ for col in cols:
537
+ if col not in df_morph.columns:
538
+ df_morph[col] = "" if col == "μ§μ ‘μž…λ ₯" else (0 if col != "단어" else "")
539
+ df_morph = df_morph[cols]
540
+ return df_morph, create_excel_file(df_morph)
541
+
542
+ # 직접 μž…λ ₯ ν‚€μ›Œλ“œμ— λŒ€ν•œ μ •οΏ½οΏ½οΏ½ (λΉˆλ„μˆ˜, API 정보) κ°€μ Έμ˜€κΈ°
543
+ # direct_keyword_analysisλŠ” "ν‚€μ›Œλ“œ" μ»¬λŸΌμ„ μ‚¬μš©ν•˜λ―€λ‘œ, df_morph의 "단어"와 톡일 ν•„μš”
544
+ df_direct_raw, _ = direct_keyword_analysis(blog_text, direct_keyword_input)
545
+ # df_direct_raw 컬럼: "ν‚€μ›Œλ“œ", "λΉˆλ„μˆ˜", "PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰", "λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜"
546
+ df_direct_raw.rename(columns={"ν‚€μ›Œλ“œ": "단어"}, inplace=True) # 컬럼λͺ… 톡일
547
+
548
+ # ν˜•νƒœμ†Œ 뢄석 결과에 'μ§μ ‘μž…λ ₯' ν‘œκΈ°
549
+ if not df_morph.empty:
550
+ df_morph["μ§μ ‘μž…λ ₯"] = df_morph["단어"].apply(lambda x: "μ§μ ‘μž…λ ₯" if x in direct_keywords_list else "")
551
+ else: # ν˜•νƒœμ†Œ 뢄석 κ²°κ³Όκ°€ λΉ„μ–΄μžˆμ„ 수 있음
552
+ df_morph = pd.DataFrame(columns=["단어", "λΉˆλ„μˆ˜", "PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰", "λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜", "μ§μ ‘μž…λ ₯"])
553
+
554
+
555
+ # 직접 μž…λ ₯된 ν‚€μ›Œλ“œ 쀑 ν˜•νƒœμ†Œ 뢄석 결과에 μ—†λŠ” 것듀을 μΆ”κ°€
556
+ # df_direct_rawμ—λŠ” λͺ¨λ“  직접 μž…λ ₯ ν‚€μ›Œλ“œμ˜ 정보가 있음
557
+
558
+ # df_morph와 df_direct_rawλ₯Ό ν•©μΉ˜λ˜, '단어' κΈ°μ€€μœΌλ‘œ 쀑볡 처리
559
+ # λ¨Όμ € df_direct_raw에 'μ§μ ‘μž…λ ₯' μ»¬λŸΌμ„ μΆ”κ°€ν•˜κ³  "μ§μ ‘μž…λ ₯"으둜 채움
560
+ df_direct_raw["μ§μ ‘μž…λ ₯"] = "μ§μ ‘μž…λ ₯"
561
+
562
+ # df_morph에 μžˆλŠ” λ‹¨μ–΄λŠ” df_morph 정보λ₯Ό μš°μ„  μ‚¬μš© (μ§μ ‘μž…λ ₯ ν”Œλž˜κ·Έλ§Œ μ—…λ°μ΄νŠΈ)
563
+ # df_direct_rawμ—μ„œ df_morph에 μ—†λŠ” λ‹¨μ–΄λ§Œ κ³¨λΌμ„œ μΆ”κ°€
564
+
565
+ # ν•©μΉ˜κΈ°: df_morphλ₯Ό κΈ°μ€€μœΌλ‘œ df_direct_raw의 정보λ₯Ό μΆ”κ°€/μ—…λ°μ΄νŠΈ
566
+ # Pandas 0.25.0 μ΄μƒμ—μ„œλŠ” combine_first의 overwrite λ™μž‘μ΄ μ•½κ°„ λ‹€λ₯Ό 수 μžˆμœΌλ―€λ‘œ merge μ‚¬μš© κ³ λ €
567
+
568
+ # 1. df_morph의 단어듀에 λŒ€ν•΄ df_direct_raw의 μ •λ³΄λ‘œ μ—…λ°μ΄νŠΈ (API 정보 λ“±)
569
+ # 단, λΉˆλ„μˆ˜λŠ” 각자 κ³„μ‚°ν•œ 것을 μœ μ§€ν• μ§€, μ•„λ‹ˆλ©΄ ν•œμͺ½μ„ 택할지 κ²°μ • ν•„μš”.
570
+ # μ—¬κΈ°μ„œλŠ” df_morph의 λΉˆλ„μˆ˜(ν˜•νƒœμ†ŒλΆ„μ„ 기반)와 df_direct_raw의 λΉˆλ„μˆ˜(λ‹¨μˆœ count)κ°€ λ‹€λ₯Ό 수 있음.
571
+ # 일단은 df_morph κΈ°μ€€μœΌλ‘œ ν•˜κ³ , μ—†λŠ” 직접 ν‚€μ›Œλ“œλ§Œ df_direct_rawμ—μ„œ μΆ”κ°€ν•˜λŠ” 방식.
572
+
573
+ # df_morph의 'μ§μ ‘μž…λ ₯' μ»¬λŸΌμ€ 이미 μœ„μ—μ„œ 처리됨.
574
+ # 이제 df_direct_rawμ—λ§Œ μžˆλŠ” ν‚€μ›Œλ“œλ₯Ό df_morph에 μΆ”κ°€
575
+
576
+ # df_morph에 μžˆλŠ” 단어 λͺ©λ‘
577
+ morph_words = df_morph['단어'].tolist() if not df_morph.empty else []
578
+
579
+ rows_to_add = []
580
+ for idx, row in df_direct_raw.iterrows():
581
+ if row['단어'] not in morph_words:
582
+ rows_to_add.append(row)
583
+
584
+ if rows_to_add:
585
+ df_to_add = pd.DataFrame(rows_to_add)
586
+ combined_df = pd.concat([df_morph, df_to_add], ignore_index=True)
587
+ else:
588
+ combined_df = df_morph.copy() # df_morphκ°€ λΉ„μ–΄μžˆμ„ μˆ˜λ„ 있음
589
+
590
+ # μ΅œμ’… 컬럼 정리 및 μˆœμ„œ
591
+ final_cols_combined = ["단어", "λΉˆλ„μˆ˜", "PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰", "λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜", "μ§μ ‘μž…λ ₯"]
592
+ for col in final_cols_combined:
593
+ if col not in combined_df.columns:
594
+ # κΈ°λ³Έκ°’ μ„€μ •: 'μ§μ ‘μž…λ ₯'은 "", λ‚˜λ¨Έμ§€λŠ” 0 λ˜λŠ” None (API 값은 None ν—ˆμš©)
595
+ if col == "μ§μ ‘μž…λ ₯":
596
+ combined_df[col] = ""
597
+ elif col == "λΉˆλ„μˆ˜":
598
+ combined_df[col] = 0
599
+ elif col == "단어":
600
+ combined_df[col] = ""
601
+ else: # API κ΄€λ ¨ 컬럼
602
+ combined_df[col] = None # pd.NA도 κ°€λŠ₯
603
+
604
+ # NA 값듀을 적절히 처리 (예: 0으둜 μ±„μš°κ±°λ‚˜ κ·ΈλŒ€λ‘œ 두기)
605
+ # API 값듀은 μˆ«μžκ°€ 아닐 수 μžˆμœΌλ―€λ‘œ (예: "< 10"), process_keywordμ—μ„œ 처리됨. μ—¬κΈ°μ„œλŠ” intν˜• λ³€ν™˜ μ „μ΄λ―€λ‘œ κ·ΈλŒ€λ‘œ λ‘ .
606
+ # Gradio Dataframe은 None을 잘 ν‘œμ‹œν•¨.
607
+ # λΉˆλ„μˆ˜λŠ” μ •μˆ˜ν˜•μ΄μ–΄μ•Ό 함
608
+ if "λΉˆλ„μˆ˜" in combined_df.columns:
609
+ combined_df["λΉˆλ„μˆ˜"] = combined_df["λΉˆλ„μˆ˜"].fillna(0).astype(int)
610
+
611
+
612
+ combined_df = combined_df[final_cols_combined].drop_duplicates(subset=['단어'], keep='first') # λ§Œμ•½μ„ μœ„ν•œ 쀑볡 제거
613
+ combined_df.sort_values(by=["μ§μ ‘μž…λ ₯", "λΉˆλ„μˆ˜"], ascending=[False, False], inplace=True, na_position='last') # μ§μ ‘μž…λ ₯ μš°μ„ , κ·Έ λ‹€μŒ λΉˆλ„μˆ˜
614
+ combined_df.reset_index(drop=True, inplace=True)
615
+
616
+ combined_excel = create_excel_file(combined_df)
617
  debug_log("combined_analysis ν•¨μˆ˜ μ™„λ£Œ")
618
+ return combined_df, combined_excel
619
+
620
 
621
  # --- 뢄석 ν•Έλ“€λŸ¬ ---
622
  def analysis_handler(blog_text: str, remove_freq1: bool, direct_keyword_input: str, direct_keyword_only: bool):
623
+ debug_log(f"analysis_handler ν•¨μˆ˜ μ‹œμž‘. 직접 ν‚€μ›Œλ“œλ§Œ 뢄석: {direct_keyword_only}")
624
+ start_time = time.time()
625
+
626
+ if not blog_text or blog_text.strip() == "μŠ€ν¬λž˜ν•‘λœ λΈ”λ‘œκ·Έ λ‚΄μš©μ΄ 여기에 ν‘œμ‹œλ©λ‹ˆλ‹€." or blog_text.strip() == "":
627
+ debug_log("뢄석할 λΈ”λ‘œκ·Έ λ‚΄μš©μ΄ μ—†μŠ΅λ‹ˆλ‹€.")
628
+ # 빈 κ²°κ³Όλ₯Ό λ°˜ν™˜ν•˜κΈ° μœ„ν•œ DataFrame ꡬ쑰 λͺ…μ‹œ
629
+ empty_cols_direct = ["ν‚€μ›Œλ“œ", "λΉˆλ„μˆ˜", "PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰", "λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜"]
630
+ empty_cols_combined = ["단어", "λΉˆλ„μˆ˜", "PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰", "λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜", "μ§μ ‘μž…λ ₯"]
631
+ df_empty = pd.DataFrame(columns=empty_cols_direct if direct_keyword_only else empty_cols_combined)
632
+ return df_empty, create_excel_file(df_empty)
633
+
634
+
635
  if direct_keyword_only:
636
  # "직접 ν‚€μ›Œλ“œ μž…λ ₯만 뢄석" 선택 μ‹œ 단독 뢄석 μˆ˜ν–‰
637
+ if not direct_keyword_input or not direct_keyword_input.strip():
638
+ debug_log("직접 ν‚€μ›Œλ“œλ§Œ 뢄석 μ„ νƒλ˜μ—ˆμœΌλ‚˜, μž…λ ₯된 직접 ν‚€μ›Œλ“œκ°€ μ—†μŠ΅λ‹ˆλ‹€.")
639
+ empty_cols_direct = ["ν‚€μ›Œλ“œ", "λΉˆλ„μˆ˜", "PCμ›”κ²€μƒ‰λŸ‰", "λͺ¨λ°”μΌμ›”κ²€μƒ‰λŸ‰", "ν† νƒˆμ›”κ²€μƒ‰λŸ‰", "λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜"]
640
+ df_empty = pd.DataFrame(columns=empty_cols_direct)
641
+ return df_empty, create_excel_file(df_empty)
642
+
643
+ result_df, excel_path = direct_keyword_analysis(blog_text, direct_keyword_input)
644
  else:
645
  # κΈ°λ³Έ 톡합 뢄석 μˆ˜ν–‰
646
+ result_df, excel_path = combined_analysis(blog_text, remove_freq1, direct_keyword_input)
647
+
648
+ end_time = time.time()
649
+ debug_log(f"analysis_handler 총 μ‹€ν–‰ μ‹œκ°„: {end_time - start_time:.2f} 초")
650
+ return result_df, excel_path
651
+
652
 
653
  # --- μŠ€ν¬λž˜ν•‘ μ‹€ν–‰ ---
654
  def fetch_blog_content(url: str):
655
  debug_log("fetch_blog_content ν•¨μˆ˜ μ‹œμž‘")
656
+ if not url or not url.strip():
657
+ return "λΈ”λ‘œκ·Έ URL을 μž…λ ₯ν•΄μ£Όμ„Έμš”."
658
+ if not url.startswith("http://") and not url.startswith("https://"):
659
+ return "μœ νš¨ν•œ URL ν˜•μ‹(http:// λ˜λŠ” https://)으둜 μž…λ ₯ν•΄μ£Όμ„Έμš”."
660
+
661
+ start_time = time.time()
662
  content = scrape_naver_blog(url)
663
+ end_time = time.time()
664
+ debug_log(f"fetch_blog_content 총 μ‹€ν–‰ μ‹œκ°„: {end_time - start_time:.2f} 초. λ‚΄μš© 길이: {len(content)}")
665
  return content
666
 
667
  # --- Custom CSS ---
668
  custom_css = """
669
  /* 전체 μ»¨ν…Œμ΄λ„ˆ μŠ€νƒ€μΌ */
670
  .gradio-container {
671
+ max-width: 1080px; /* λ„ˆλΉ„ ν™•μž₯ */
672
  margin: auto;
673
  font-family: 'Helvetica Neue', Arial, sans-serif;
674
  background: #f5f7fa;
 
702
  padding: 0.6rem 1.2rem;
703
  font-size: 1rem;
704
  cursor: pointer;
705
+ min-width: 150px; /* λ²„νŠΌ μ΅œμ†Œ λ„ˆλΉ„ */
706
  }
707
+ .custom-button:hover {
708
+ background-color: #0056b3;
709
+ }
710
+
711
 
712
  /* μ²΄ν¬λ°•μŠ€ μŠ€νƒ€μΌ */
713
  .custom-checkbox {
 
728
  """
729
 
730
  # --- Gradio μΈν„°νŽ˜μ΄μŠ€ ꡬ성 ---
731
+ with gr.Blocks(title="넀이버 λΈ”λ‘œκ·Έ ν‚€μ›Œλ“œ 뢄석 μ„œλΉ„μŠ€", css=custom_css) as demo:
732
+ gr.HTML("<div class='custom-header'>넀이버 λΈ”λ‘œκ·Έ ν‚€μ›Œλ“œ 뢄석 μ„œλΉ„μŠ€</div>")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
733
 
734
+ with gr.Row():
735
+ with gr.Column(scale=2): # μ™Όμͺ½ 컬럼 (μž…λ ₯ μ˜μ—­)
736
+ with gr.Group(elem_classes="custom-group"):
737
+ blog_url_input = gr.Textbox(
738
+ label="넀이버 λΈ”λ‘œκ·Έ 링크",
739
+ placeholder="예: https://blog.naver.com/아이디/κΈ€λ²ˆν˜Έ",
740
+ lines=1,
741
+ info="뢄석할 넀이버 λΈ”λ‘œκ·Έ κ²Œμ‹œλ¬Ό URL을 μž…λ ₯ν•˜μ„Έμš”."
742
+ )
743
+ with gr.Row(elem_classes="centered"):
744
+ scrape_button = gr.Button("λΈ”λ‘œκ·Έ λ‚΄μš© κ°€μ Έμ˜€κΈ°", elem_classes="custom-button", variant="primary")
745
+
746
+ with gr.Group(elem_classes="custom-group"):
747
+ blog_content_box = gr.Textbox(
748
+ label="λΈ”λ‘œκ·Έ λ‚΄μš© (μˆ˜μ • κ°€λŠ₯)",
749
+ lines=10,
750
+ placeholder="μŠ€ν¬λž˜ν•‘λœ λΈ”λ‘œκ·Έ λ‚΄μš©μ΄ 여기에 ν‘œμ‹œλ©λ‹ˆλ‹€. 직접 μˆ˜μ •ν•˜κ±°λ‚˜ 뢙여넣을 수 μžˆμŠ΅λ‹ˆλ‹€."
751
+ )
752
+
753
+ with gr.Group(elem_classes="custom-group"):
754
+ gr.Markdown("### 뢄석 μ˜΅μ…˜ μ„€μ •")
755
+ with gr.Row():
756
+ remove_freq_checkbox = gr.Checkbox(
757
+ label="λΉˆλ„μˆ˜ 1인 단어 제거 (ν˜•νƒœμ†Œ 뢄석 μ‹œ)",
758
+ value=True,
759
+ elem_classes="custom-checkbox",
760
+ info="ν˜•νƒœμ†Œ 뢄석 κ²°κ³Όμ—μ„œ λΉˆλ„μˆ˜κ°€ 1인 단어λ₯Ό μ œμ™Έν•©λ‹ˆλ‹€."
761
+ )
762
+ with gr.Row():
763
+ direct_keyword_only_checkbox = gr.Checkbox(
764
+ label="직접 ν‚€μ›Œλ“œλ§Œ 뢄석",
765
+ value=False,
766
+ elem_classes="custom-checkbox",
767
+ info="이 μ˜΅μ…˜μ„ μ„ νƒν•˜λ©΄ μ•„λž˜ μž…λ ₯ν•œ 직접 ν‚€μ›Œλ“œμ— λŒ€ν•΄μ„œλ§Œ 뢄석을 μˆ˜ν–‰ν•©λ‹ˆλ‹€ (ν˜•νƒœμ†Œ 뢄석 μƒλž΅)."
768
+ )
769
+ with gr.Row():
770
+ direct_keyword_box = gr.Textbox(
771
+ label="직접 ν‚€μ›Œλ“œ μž…λ ₯ (μ—”ν„° λ˜λŠ” ','둜 ꡬ뢄)",
772
+ lines=3,
773
+ placeholder="예: ν‚€μ›Œλ“œ1, ν‚€μ›Œλ“œ2\nν‚€μ›Œλ“œ3\n...\n(ν˜•νƒœμ†Œ 뢄석 결과와 λ³„λ„λ‘œ λΆ„μ„ν•˜κ±°λ‚˜, 톡합 뢄석에 μΆ”κ°€ν•  ν‚€μ›Œλ“œ)",
774
+ info="뢄석에 ν¬ν•¨ν•˜κ±°λ‚˜ λ‹¨λ…μœΌλ‘œ 뢄석할 ν‚€μ›Œλ“œλ₯Ό 직접 μž…λ ₯ν•©λ‹ˆλ‹€."
775
+ )
776
+
777
+ with gr.Group(elem_classes="custom-group"):
778
+ with gr.Row(elem_classes="centered"):
779
+ analyze_button = gr.Button("ν‚€μ›Œλ“œ 뢄석 μ‹€ν–‰", elem_classes="custom-button", variant="primary")
780
+
781
+ with gr.Column(scale=3): # 였λ₯Έμͺ½ 컬럼 (κ²°κ³Ό μ˜μ—­)
782
+ with gr.Group(elem_classes="custom-group custom-result"):
783
+ gr.Markdown("### 뢄석 κ²°κ³Ό")
784
+ result_df_display = gr.Dataframe(
785
+ label="톡합 뢄석 κ²°κ³Ό (단어, λΉˆλ„μˆ˜, κ²€μƒ‰λŸ‰, λΈ”λ‘œκ·Έλ¬Έμ„œμˆ˜, μ§μ ‘μž…λ ₯ μ—¬λΆ€)",
786
+ interactive=False, # μ‚¬μš©μžκ°€ 직접 μˆ˜μ • λΆˆκ°€
787
+ height=600, # 높이 쑰절
788
+ wrap=True # κΈ΄ ν…μŠ€νŠΈ μ€„λ°”κΏˆ
789
+ )
790
+ with gr.Group(elem_classes="custom-group"):
791
+ gr.Markdown("### κ²°κ³Ό λ‹€μš΄λ‘œλ“œ")
792
+ excel_file_display = gr.File(label="뢄석 κ²°κ³Ό Excel 파일 λ‹€μš΄λ‘œλ“œ")
793
+
794
  # 이벀트 μ—°κ²°
795
  scrape_button.click(fn=fetch_blog_content, inputs=blog_url_input, outputs=blog_content_box)
796
+ analyze_button.click(
797
+ fn=analysis_handler,
798
+ inputs=[blog_content_box, remove_freq_checkbox, direct_keyword_box, direct_keyword_only_checkbox],
799
+ outputs=[result_df_display, excel_file_display]
800
+ )
801
 
802
  if __name__ == "__main__":
803
+ # ν™˜κ²½ λ³€μˆ˜ μ„€μ • μ˜ˆμ‹œ (μ‹€μ œ μ‹€ν–‰ μ‹œμ—λŠ” μ‹œμŠ€ν…œ ν™˜κ²½ λ³€μˆ˜λ‘œ μ„€μ •ν•˜κ±°λ‚˜, .env 파일 등을 μ‚¬μš©)
804
+ # os.environ["NAVER_API_KEY"] = "YOUR_NAVER_API_KEY"
805
+ # os.environ["NAVER_SECRET_KEY"] = "YOUR_NAVER_SECRET_KEY"
806
+ # os.environ["NAVER_CUSTOMER_ID"] = "YOUR_NAVER_CUSTOMER_ID"
807
+ # os.environ["NAVER_SEARCH_CLIENT_ID"] = "YOUR_NAVER_SEARCH_CLIENT_ID"
808
+ # os.environ["NAVER_SEARCH_CLIENT_SECRET"] = "YOUR_NAVER_SEARCH_CLIENT_SECRET"
809
+
810
+ # ν™˜κ²½ λ³€μˆ˜ μ„€μ • 확인
811
+ required_env_vars = [
812
+ "NAVER_API_KEY", "NAVER_SECRET_KEY", "NAVER_CUSTOMER_ID",
813
+ "NAVER_SEARCH_CLIENT_ID", "NAVER_SEARCH_CLIENT_SECRET"
814
+ ]
815
+ missing_vars = [var for var in required_env_vars if not os.environ.get(var)]
816
+ if missing_vars:
817
+ debug_log(f"κ²½κ³ : λ‹€μŒ ν•„μˆ˜ ν™˜κ²½ λ³€μˆ˜κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€ - {', '.join(missing_vars)}")
818
+ debug_log("API 호좜 κΈ°λŠ₯이 μ •μƒμ μœΌλ‘œ λ™μž‘ν•˜μ§€ μ•Šμ„ 수 μžˆμŠ΅λ‹ˆλ‹€.")
819
+ debug_log("슀크립트 μ‹€ν–‰ 전에 ν•΄λ‹Ή ν™˜κ²½ λ³€μˆ˜λ₯Ό μ„€μ •ν•΄μ£Όμ„Έμš”.")
820
+ # Gradio 앱은 μ‹€ν–‰ν•˜λ˜, API 호좜 μ‹œ 였λ₯˜κ°€ λ°œμƒν•  수 μžˆμŒμ„ μ‚¬μš©μžμ—κ²Œ μ•Œλ¦Ό.
821
+
822
  debug_log("Gradio μ•± μ‹€ν–‰ μ‹œμž‘")
823
+ demo.launch(debug=True) # 개발 μ€‘μ—λŠ” debug=True둜 μ„€μ •ν•˜μ—¬ 였λ₯˜ 확인 용이
824
+ debug_log("Gradio μ•± μ‹€ν–‰ μ’…λ£Œ")