N_B_analysis-5 / app.py
Kims12's picture
Update app.py
f9c1fca verified
raw
history blame
21.1 kB
import gradio as gr
import requests
import aiohttp
import asyncio
from bs4 import BeautifulSoup
import urllib.parse # iframe ๊ฒฝ๋กœ ๋ณด์ •์„ ์œ„ํ•œ ๋ชจ๋“ˆ
import re
import logging
import tempfile
import pandas as pd
import mecab # pythonโ€‘mecabโ€‘ko ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ์‚ฌ์šฉ
import os
import time
import hmac
import hashlib
import base64
# ๋””๋ฒ„๊น…(๋กœ๊ทธ)์šฉ ํ•จ์ˆ˜
def debug_log(message: str):
print(f"[DEBUG] {message}")
# --- ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํ•‘ (๋น„๋™๊ธฐ ๋ฒ„์ „) ---
async def scrape_naver_blog(url: str) -> str:
debug_log("scrape_naver_blog ํ•จ์ˆ˜ ์‹œ์ž‘")
debug_log(f"์š”์ฒญ๋ฐ›์€ URL: {url}")
headers = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/96.0.4664.110 Safari/537.36"
)
}
try:
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers) as response:
debug_log("HTTP GET ์š”์ฒญ(๋ฉ”์ธ ํŽ˜์ด์ง€) ์™„๋ฃŒ")
if response.status != 200:
debug_log(f"์š”์ฒญ ์‹คํŒจ, ์ƒํƒœ์ฝ”๋“œ: {response.status}")
return f"์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. ์ƒํƒœ์ฝ”๋“œ: {response.status}"
html = await response.text()
soup = BeautifulSoup(html, "html.parser")
debug_log("HTML ํŒŒ์‹ฑ(๋ฉ”์ธ ํŽ˜์ด์ง€) ์™„๋ฃŒ")
iframe = soup.select_one("iframe#mainFrame")
if not iframe:
debug_log("iframe#mainFrame ํƒœ๊ทธ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
return "๋ณธ๋ฌธ iframe์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
iframe_src = iframe.get("src")
if not iframe_src:
debug_log("iframe src๊ฐ€ ์กด์žฌํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.")
return "๋ณธ๋ฌธ iframe์˜ src๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
parsed_iframe_url = urllib.parse.urljoin(url, iframe_src)
debug_log(f"iframe ํŽ˜์ด์ง€ ์š”์ฒญ URL: {parsed_iframe_url}")
async with aiohttp.ClientSession() as session:
async with session.get(parsed_iframe_url, headers=headers) as iframe_response:
debug_log("HTTP GET ์š”์ฒญ(iframe ํŽ˜์ด์ง€) ์™„๋ฃŒ")
if iframe_response.status != 200:
debug_log(f"iframe ์š”์ฒญ ์‹คํŒจ, ์ƒํƒœ์ฝ”๋“œ: {iframe_response.status}")
return f"iframe์—์„œ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. ์ƒํƒœ์ฝ”๋“œ: {iframe_response.status}"
iframe_html = await iframe_response.text()
iframe_soup = BeautifulSoup(iframe_html, "html.parser")
debug_log("HTML ํŒŒ์‹ฑ(iframe ํŽ˜์ด์ง€) ์™„๋ฃŒ")
title_div = iframe_soup.select_one('.se-module.se-module-text.se-title-text')
title = title_div.get_text(strip=True) if title_div else "์ œ๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
debug_log(f"์ถ”์ถœ๋œ ์ œ๋ชฉ: {title}")
content_div = iframe_soup.select_one('.se-main-container')
if content_div:
content = content_div.get_text("\n", strip=True)
else:
content = "๋ณธ๋ฌธ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
debug_log("๋ณธ๋ฌธ ์ถ”์ถœ ์™„๋ฃŒ")
result = f"[์ œ๋ชฉ]\n{title}\n\n[๋ณธ๋ฌธ]\n{content}"
debug_log("์ œ๋ชฉ๊ณผ ๋ณธ๋ฌธ ํ•ฉ์นจ ์™„๋ฃŒ")
return result
except Exception as e:
debug_log(f"์—๋Ÿฌ ๋ฐœ์ƒ: {str(e)}")
return f"์Šคํฌ๋ž˜ํ•‘ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
# --- ๋„ค์ด๋ฒ„ ๊ฒ€์ƒ‰ ๋ฐ ๊ด‘๊ณ  API ๊ด€๋ จ ---
def generate_signature(timestamp, method, uri, secret_key):
message = f"{timestamp}.{method}.{uri}"
digest = hmac.new(secret_key.encode("utf-8"), message.encode("utf-8"), hashlib.sha256).digest()
return base64.b64encode(digest).decode()
def get_header(method, uri, api_key, secret_key, customer_id):
timestamp = str(round(time.time() * 1000))
signature = generate_signature(timestamp, method, uri, secret_key)
return {
"Content-Type": "application/json; charset=UTF-8",
"X-Timestamp": timestamp,
"X-API-KEY": api_key,
"X-Customer": str(customer_id),
"X-Signature": signature
}
# --- ์—ฐ๊ด€ ํ‚ค์›Œ๋“œ ์กฐํšŒ (๋น„๋™๊ธฐ) ---
async def fetch_related_keywords(keyword):
debug_log(f"fetch_related_keywords ํ˜ธ์ถœ, ํ‚ค์›Œ๋“œ: {keyword}")
API_KEY = os.environ["NAVER_API_KEY"]
SECRET_KEY = os.environ["NAVER_SECRET_KEY"]
CUSTOMER_ID = os.environ["NAVER_CUSTOMER_ID"]
BASE_URL = "https://api.naver.com"
uri = "/keywordstool"
method = "GET"
headers = get_header(method, uri, API_KEY, SECRET_KEY, CUSTOMER_ID)
params = {
"hintKeywords": [keyword],
"showDetail": "1"
}
async with aiohttp.ClientSession() as session:
async with session.get(BASE_URL + uri, headers=headers, params=params) as response:
data = await response.json()
if "keywordList" not in data:
return pd.DataFrame()
df = pd.DataFrame(data["keywordList"])
if len(df) > 100:
df = df.head(100)
def parse_count(x):
try:
return int(str(x).replace(",", ""))
except:
return 0
df["PC์›”๊ฒ€์ƒ‰๋Ÿ‰"] = df["monthlyPcQcCnt"].apply(parse_count)
df["๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰"] = df["monthlyMobileQcCnt"].apply(parse_count)
df["ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰"] = df["PC์›”๊ฒ€์ƒ‰๋Ÿ‰"] + df["๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰"]
df.rename(columns={"relKeyword": "์ •๋ณดํ‚ค์›Œ๋“œ"}, inplace=True)
result_df = df[["์ •๋ณดํ‚ค์›Œ๋“œ", "PC์›”๊ฒ€์ƒ‰๋Ÿ‰", "๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰", "ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰"]]
debug_log("fetch_related_keywords ์™„๋ฃŒ")
return result_df
# --- ๋ธ”๋กœ๊ทธ ๋ฌธ์„œ์ˆ˜ ์กฐํšŒ (๋น„๋™๊ธฐ) ---
async def fetch_blog_count(keyword):
debug_log(f"fetch_blog_count ํ˜ธ์ถœ, ํ‚ค์›Œ๋“œ: {keyword}")
client_id = os.environ["NAVER_SEARCH_CLIENT_ID"]
client_secret = os.environ["NAVER_SEARCH_CLIENT_SECRET"]
url = "https://openapi.naver.com/v1/search/blog.json"
headers = {
"X-Naver-Client-Id": client_id,
"X-Naver-Client-Secret": client_secret
}
params = {"query": keyword, "display": 1}
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers, params=params) as response:
if response.status == 200:
data = await response.json()
debug_log(f"fetch_blog_count ๊ฒฐ๊ณผ: {data.get('total', 0)}")
return data.get("total", 0)
else:
debug_log(f"fetch_blog_count ์˜ค๋ฅ˜, ์ƒํƒœ์ฝ”๋“œ: {response.status}")
return 0
def create_excel_file(df):
with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
excel_path = tmp.name
df.to_excel(excel_path, index=False, engine='openpyxl')
debug_log(f"Excel ํŒŒ์ผ ์ƒ์„ฑ๋จ: {excel_path}")
return excel_path
# --- ํ‚ค์›Œ๋“œ ๊ฒ€์ƒ‰ (๋น„๋™๊ธฐ) ---
async def process_keyword(keywords: str, include_related: bool):
debug_log(f"process_keyword ํ˜ธ์ถœ, ํ‚ค์›Œ๋“œ๋“ค: {keywords}, ์—ฐ๊ด€๊ฒ€์ƒ‰์–ด ํฌํ•จ: {include_related}")
input_keywords = [k.strip() for k in keywords.splitlines() if k.strip()]
result_dfs = []
for idx, kw in enumerate(input_keywords):
df_kw = await fetch_related_keywords(kw)
if df_kw.empty:
continue
row_kw = df_kw[df_kw["์ •๋ณดํ‚ค์›Œ๋“œ"] == kw]
if not row_kw.empty:
result_dfs.append(row_kw)
else:
result_dfs.append(df_kw.head(1))
if include_related and idx == 0:
df_related = df_kw[df_kw["์ •๋ณดํ‚ค์›Œ๋“œ"] != kw]
if not df_related.empty:
result_dfs.append(df_related)
if result_dfs:
result_df = pd.concat(result_dfs, ignore_index=True)
result_df.drop_duplicates(subset=["์ •๋ณดํ‚ค์›Œ๋“œ"], inplace=True)
else:
result_df = pd.DataFrame(columns=["์ •๋ณดํ‚ค์›Œ๋“œ", "PC์›”๊ฒ€์ƒ‰๋Ÿ‰", "๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰", "ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰"])
# ๋ธ”๋กœ๊ทธ ๋ฌธ์„œ์ˆ˜ ์กฐํšŒ๋ฅผ ๋ณ‘๋ ฌ๋กœ ์ฒ˜๋ฆฌ
tasks = [fetch_blog_count(kw) for kw in result_df["์ •๋ณดํ‚ค์›Œ๋“œ"]]
counts = await asyncio.gather(*tasks)
result_df["๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜"] = counts
result_df.sort_values(by="ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰", ascending=False, inplace=True)
debug_log("process_keyword ์™„๋ฃŒ")
return result_df, create_excel_file(result_df)
# --- ํ˜•ํƒœ์†Œ ๋ถ„์„ (์ฐธ๊ณ  ์ฝ”๋“œ-1, ๋™๊ธฐ) ---
def analyze_text(text: str):
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
logger.debug("์›๋ณธ ํ…์ŠคํŠธ: %s", text)
filtered_text = re.sub(r'[^๊ฐ€-ํžฃ]', '', text)
logger.debug("ํ•„ํ„ฐ๋ง๋œ ํ…์ŠคํŠธ: %s", filtered_text)
if not filtered_text:
logger.debug("์œ ํšจํ•œ ํ•œ๊ตญ์–ด ํ…์ŠคํŠธ๊ฐ€ ์—†์Œ.")
return pd.DataFrame(columns=["๋‹จ์–ด", "๋นˆ๋„์ˆ˜"]), ""
mecab_instance = mecab.MeCab()
tokens = mecab_instance.pos(filtered_text)
logger.debug("ํ˜•ํƒœ์†Œ ๋ถ„์„ ๊ฒฐ๊ณผ: %s", tokens)
freq = {}
for word, pos in tokens:
if word and word.strip() and pos.startswith("NN"):
freq[word] = freq.get(word, 0) + 1
logger.debug("๋‹จ์–ด: %s, ํ’ˆ์‚ฌ: %s, ๋นˆ๋„: %d", word, pos, freq[word])
sorted_freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)
logger.debug("์ •๋ ฌ๋œ ๋‹จ์–ด ๋นˆ๋„: %s", sorted_freq)
df = pd.DataFrame(sorted_freq, columns=["๋‹จ์–ด", "๋นˆ๋„์ˆ˜"])
logger.debug("ํ˜•ํƒœ์†Œ ๋ถ„์„ DataFrame ์ƒ์„ฑ๋จ, shape: %s", df.shape)
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
df.to_excel(temp_file.name, index=False, engine='openpyxl')
temp_file.close()
logger.debug("Excel ํŒŒ์ผ ์ƒ์„ฑ๋จ: %s", temp_file.name)
return df, temp_file.name
# --- ํ˜•ํƒœ์†Œ ๋ถ„์„๊ณผ ๊ฒ€์ƒ‰๋Ÿ‰/๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜ ๋ณ‘ํ•ฉ (๋น„๋™๊ธฐ) ---
async def morphological_analysis_and_enrich(text: str, remove_freq1: bool):
debug_log("morphological_analysis_and_enrich ํ•จ์ˆ˜ ์‹œ์ž‘")
df_freq, _ = analyze_text(text)
if df_freq.empty:
debug_log("ํ˜•ํƒœ์†Œ ๋ถ„์„ ๊ฒฐ๊ณผ๊ฐ€ ๋นˆ ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์ž…๋‹ˆ๋‹ค.")
return df_freq, ""
if remove_freq1:
before_shape = df_freq.shape
df_freq = df_freq[df_freq["๋นˆ๋„์ˆ˜"] != 1]
debug_log(f"๋นˆ๋„์ˆ˜ 1 ์ œ๊ฑฐ ์ ์šฉ๋จ. {before_shape} -> {df_freq.shape}")
keywords = "\n".join(df_freq["๋‹จ์–ด"].tolist())
debug_log(f"๋ถ„์„๋œ ํ‚ค์›Œ๋“œ: {keywords}")
df_keyword_info, _ = await process_keyword(keywords, include_related=False)
debug_log("๊ฒ€์ƒ‰๋Ÿ‰ ๋ฐ ๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜ ์กฐํšŒ ์™„๋ฃŒ")
merged_df = pd.merge(df_freq, df_keyword_info, left_on="๋‹จ์–ด", right_on="์ •๋ณดํ‚ค์›Œ๋“œ", how="left")
merged_df.drop(columns=["์ •๋ณดํ‚ค์›Œ๋“œ"], inplace=True)
merged_excel_path = create_excel_file(merged_df)
debug_log("morphological_analysis_and_enrich ํ•จ์ˆ˜ ์™„๋ฃŒ")
return merged_df, merged_excel_path
# --- ์ง์ ‘ ํ‚ค์›Œ๋“œ ๋ถ„์„ (๋‹จ๋… ๋ถ„์„, ๋น„๋™๊ธฐ) ---
async def direct_keyword_analysis(text: str, keyword_input: str):
debug_log("direct_keyword_analysis ํ•จ์ˆ˜ ์‹œ์ž‘")
keywords = re.split(r'[\n,]+', keyword_input)
keywords = [kw.strip() for kw in keywords if kw.strip()]
debug_log(f"์ž…๋ ฅ๋œ ํ‚ค์›Œ๋“œ ๋ชฉ๋ก: {keywords}")
results = []
for kw in keywords:
count = text.count(kw)
results.append((kw, count))
debug_log(f"ํ‚ค์›Œ๋“œ '{kw}'์˜ ๋นˆ๋„์ˆ˜: {count}")
# ์ง์ ‘ ์ž…๋ ฅ ํ‚ค์›Œ๋“œ๊ฐ€ ๋ณธ๋ฌธ์— ์—†์œผ๋ฉด ์ถ”๊ฐ€ ์กฐํšŒ
if kw not in text:
df_direct, _ = await process_keyword(kw, include_related=False)
if (not df_direct.empty) and (kw in df_direct["์ •๋ณดํ‚ค์›Œ๋“œ"].values):
row = df_direct[df_direct["์ •๋ณดํ‚ค์›Œ๋“œ"] == kw].iloc[0]
pc = row.get("PC์›”๊ฒ€์ƒ‰๋Ÿ‰", None)
mobile = row.get("๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰", None)
total = row.get("ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰", None)
blog_count = row.get("๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜", None)
else:
pc = mobile = total = blog_count = None
# ๊ฒฐ๊ณผ์— ์ƒˆ ํ–‰ ์ถ”๊ฐ€
results.append((kw, count))
df = pd.DataFrame(results, columns=["ํ‚ค์›Œ๋“œ", "๋นˆ๋„์ˆ˜"])
excel_path = create_excel_file(df)
debug_log("direct_keyword_analysis ํ•จ์ˆ˜ ์™„๋ฃŒ")
return df, excel_path
# --- ํ†ตํ•ฉ ๋ถ„์„ (ํ˜•ํƒœ์†Œ ๋ถ„์„ + ์ง์ ‘ ํ‚ค์›Œ๋“œ ๋ถ„์„, ๋น„๋™๊ธฐ) ---
async def combined_analysis(blog_text: str, remove_freq1: bool, direct_keyword_input: str):
debug_log("combined_analysis ํ•จ์ˆ˜ ์‹œ์ž‘")
merged_df, _ = await morphological_analysis_and_enrich(blog_text, remove_freq1)
if "์ง์ ‘์ž…๋ ฅ" not in merged_df.columns:
merged_df["์ง์ ‘์ž…๋ ฅ"] = ""
direct_keywords = re.split(r'[\n,]+', direct_keyword_input)
direct_keywords = [kw.strip() for kw in direct_keywords if kw.strip()]
debug_log(f"์ž…๋ ฅ๋œ ์ง์ ‘ ํ‚ค์›Œ๋“œ: {direct_keywords}")
for dk in direct_keywords:
if dk in merged_df["๋‹จ์–ด"].values:
merged_df.loc[merged_df["๋‹จ์–ด"] == dk, "์ง์ ‘์ž…๋ ฅ"] = "์ง์ ‘์ž…๋ ฅ"
else:
freq = blog_text.count(dk)
df_direct, _ = await process_keyword(dk, include_related=False)
if (not df_direct.empty) and (dk in df_direct["์ •๋ณดํ‚ค์›Œ๋“œ"].values):
row = df_direct[df_direct["์ •๋ณดํ‚ค์›Œ๋“œ"] == dk].iloc[0]
pc = row.get("PC์›”๊ฒ€์ƒ‰๋Ÿ‰", None)
mobile = row.get("๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰", None)
total = row.get("ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰", None)
blog_count = row.get("๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜", None)
else:
pc = mobile = total = blog_count = None
new_row = {
"๋‹จ์–ด": dk,
"๋นˆ๋„์ˆ˜": freq,
"PC์›”๊ฒ€์ƒ‰๋Ÿ‰": pc,
"๋ชจ๋ฐ”์ผ์›”๊ฒ€์ƒ‰๋Ÿ‰": mobile,
"ํ† ํƒˆ์›”๊ฒ€์ƒ‰๋Ÿ‰": total,
"๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜": blog_count,
"์ง์ ‘์ž…๋ ฅ": "์ง์ ‘์ž…๋ ฅ"
}
merged_df = pd.concat([merged_df, pd.DataFrame([new_row])], ignore_index=True)
merged_df = merged_df.sort_values(by="๋นˆ๋„์ˆ˜", ascending=False).reset_index(drop=True)
combined_excel = create_excel_file(merged_df)
debug_log("combined_analysis ํ•จ์ˆ˜ ์™„๋ฃŒ")
return merged_df, combined_excel
# --- ๋ถ„์„ ํ•ธ๋“ค๋Ÿฌ (๋น„๋™๊ธฐ) ---
async def analysis_handler(blog_text: str, remove_freq1: bool, direct_keyword_input: str, direct_keyword_only: bool):
debug_log("analysis_handler ํ•จ์ˆ˜ ์‹œ์ž‘")
if direct_keyword_only:
return await direct_keyword_analysis(blog_text, direct_keyword_input)
else:
return await combined_analysis(blog_text, remove_freq1, direct_keyword_input)
# --- ์Šคํฌ๋ž˜ํ•‘ ์‹คํ–‰ ํ•ธ๋“ค๋Ÿฌ (๋น„๋™๊ธฐ) ---
async def fetch_blog_content(url: str):
debug_log("fetch_blog_content ํ•จ์ˆ˜ ์‹œ์ž‘")
content = await scrape_naver_blog(url)
debug_log("fetch_blog_content ํ•จ์ˆ˜ ์™„๋ฃŒ")
return content
# --- Custom CSS ---
custom_css = """
/* ์ „์ฒด ์ปจํ…Œ์ด๋„ˆ ์Šคํƒ€์ผ */
.gradio-container {
max-width: 960px;
margin: auto;
font-family: 'Helvetica Neue', Arial, sans-serif;
background: #f5f7fa;
padding: 2rem;
}
/* ํ—ค๋” ์Šคํƒ€์ผ */
.custom-header {
text-align: center;
font-size: 2.5rem;
font-weight: bold;
margin-bottom: 1.5rem;
color: #333;
}
/* ๊ทธ๋ฃน ๋ฐ•์Šค ์Šคํƒ€์ผ */
.custom-group {
background: #ffffff;
border-radius: 8px;
padding: 1.5rem;
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
margin-bottom: 1.5rem;
}
/* ๋ฒ„ํŠผ ์Šคํƒ€์ผ */
.custom-button {
background-color: #007bff;
color: #fff;
border: none;
border-radius: 4px;
padding: 0.6rem 1.2rem;
font-size: 1rem;
cursor: pointer;
transition: background-color 0.3s;
}
.custom-button:hover {
background-color: #0056b3;
}
/* ์ฒดํฌ๋ฐ•์Šค ์Šคํƒ€์ผ */
.custom-checkbox {
margin-right: 1rem;
font-size: 1rem;
font-weight: bold;
}
/* ๊ฒฐ๊ณผ ํ…Œ์ด๋ธ” ๋ฐ ๋‹ค์šด๋กœ๋“œ ๋ฒ„ํŠผ */
.custom-result {
margin-top: 1.5rem;
}
/* ๊ฐ€์šด๋ฐ ์ •๋ ฌ */
.centered {
display: flex;
justify-content: center;
align-items: center;
}
/* ์‚ฌ์šฉ์„ค๋ช… ์Šคํƒ€์ผ */
.usage-instructions {
font-size: 1.1rem;
line-height: 1.6;
color: #555;
background: #fff;
padding: 1.5rem;
border-radius: 8px;
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
margin-top: 2rem;
}
.usage-instructions h2 {
font-size: 1.8rem;
font-weight: bold;
margin-bottom: 1rem;
color: #333;
}
.usage-instructions ul {
list-style: disc;
margin-left: 2rem;
}
"""
# --- Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ ---
with gr.Blocks(title="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ํ˜•ํƒœ์†Œ ๋ถ„์„ ์„œ๋น„์Šค", css=custom_css) as demo:
gr.HTML("<div class='custom-header'>๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ํ˜•ํƒœ์†Œ ๋ถ„์„ ์„œ๋น„์Šค ๐Ÿš€</div>")
with gr.Group(elem_classes="custom-group"):
with gr.Row():
blog_url_input = gr.Textbox(label="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ", placeholder="์˜ˆ: https://blog.naver.com/ssboost/222983068507", lines=1)
with gr.Row(elem_classes="centered"):
scrape_button = gr.Button("์Šคํฌ๋ž˜ํ•‘ ์‹คํ–‰", elem_classes="custom-button")
with gr.Group(elem_classes="custom-group"):
blog_content_box = gr.Textbox(label="๋ธ”๋กœ๊ทธ ๋‚ด์šฉ (์ˆ˜์ • ๊ฐ€๋Šฅ)", lines=10, placeholder="์Šคํฌ๋ž˜ํ•‘๋œ ๋ธ”๋กœ๊ทธ ๋‚ด์šฉ์ด ์—ฌ๊ธฐ์— ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค.")
with gr.Group(elem_classes="custom-group"):
with gr.Row():
remove_freq_checkbox = gr.Checkbox(label="๋นˆ๋„์ˆ˜1 ์ œ๊ฑฐ", value=True, elem_classes="custom-checkbox")
with gr.Row():
direct_keyword_only_checkbox = gr.Checkbox(label="์ง์ ‘ ํ‚ค์›Œ๋“œ ์ž…๋ ฅ๋งŒ ๋ถ„์„", value=False, elem_classes="custom-checkbox")
with gr.Row():
direct_keyword_box = gr.Textbox(label="์ง์ ‘ ํ‚ค์›Œ๋“œ ์ž…๋ ฅ (์—”ํ„ฐ ๋˜๋Š” ','๋กœ ๊ตฌ๋ถ„)", lines=2, placeholder="์˜ˆ: ํ‚ค์›Œ๋“œ1, ํ‚ค์›Œ๋“œ2\nํ‚ค์›Œ๋“œ3")
with gr.Group(elem_classes="custom-group"):
with gr.Row(elem_classes="centered"):
analyze_button = gr.Button("๋ถ„์„ ์‹คํ–‰", elem_classes="custom-button")
with gr.Group(elem_classes="custom-group custom-result"):
result_df = gr.Dataframe(label="ํ†ตํ•ฉ ๋ถ„์„ ๊ฒฐ๊ณผ (๋‹จ์–ด, ๋นˆ๋„์ˆ˜, ๊ฒ€์ƒ‰๋Ÿ‰, ๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜, ์ง์ ‘์ž…๋ ฅ)", interactive=True)
with gr.Group(elem_classes="custom-group"):
excel_file = gr.File(label="Excel ๋‹ค์šด๋กœ๋“œ")
with gr.Group(elem_classes="custom-group"):
usage_html = gr.HTML("""
<div class="usage-instructions">
<h2>์‚ฌ์šฉ ์„ค๋ช… ๐Ÿ“–</h2>
<ul>
<li>๐Ÿ”— <strong>๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ</strong>: ๋ถ„์„ํ•  ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ์˜ URL์„ ์ž…๋ ฅํ•˜์„ธ์š”.</li>
<li>โœ‚๏ธ <strong>์Šคํฌ๋ž˜ํ•‘ ์‹คํ–‰</strong>: ๋งํฌ ์ž…๋ ฅ ํ›„ ๋ฒ„ํŠผ์„ ํด๋ฆญํ•˜๋ฉด ๋ธ”๋กœ๊ทธ์˜ ์ œ๋ชฉ๊ณผ ๋ณธ๋ฌธ์ด ์ž๋™์œผ๋กœ ๋ถˆ๋Ÿฌ์™€์ง‘๋‹ˆ๋‹ค.</li>
<li>๐Ÿ“ <strong>๋ธ”๋กœ๊ทธ ๋‚ด์šฉ (์ˆ˜์ • ๊ฐ€๋Šฅ)</strong>: ๋ถˆ๋Ÿฌ์˜จ ๋ธ”๋กœ๊ทธ ๋‚ด์šฉ์ด ํ‘œ์‹œ๋˜๋ฉฐ, ํ•„์š”์— ๋”ฐ๋ผ ์ง์ ‘ ์ˆ˜์ •ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.</li>
<li>โš™๏ธ <strong>์˜ต์…˜ ์„ค์ •</strong>:
<ul>
<li><em>๋นˆ๋„์ˆ˜1 ์ œ๊ฑฐ</em>: ๊ธฐ๋ณธ ์„ ํƒ๋˜์–ด ์žˆ์œผ๋ฉฐ, ๋นˆ๋„์ˆ˜๊ฐ€ 1์ธ ๋‹จ์–ด๋Š” ๊ฒฐ๊ณผ์—์„œ ์ œ์™ธํ•ฉ๋‹ˆ๋‹ค.</li>
<li><em>์ง์ ‘ ํ‚ค์›Œ๋“œ ์ž…๋ ฅ๋งŒ ๋ถ„์„</em>: ์ด ์˜ต์…˜์„ ์„ ํƒํ•˜๋ฉด, ๋ธ”๋กœ๊ทธ ๋ณธ๋ฌธ์—์„œ ์ง์ ‘ ์ž…๋ ฅํ•œ ํ‚ค์›Œ๋“œ๋งŒ ๋ถ„์„ํ•ฉ๋‹ˆ๋‹ค.</li>
</ul>
</li>
<li>๐Ÿ”ค <strong>์ง์ ‘ ํ‚ค์›Œ๋“œ ์ž…๋ ฅ</strong>: ์—”ํ„ฐ ๋˜๋Š” ์‰ผํ‘œ(,)๋กœ ๊ตฌ๋ถ„ํ•˜์—ฌ ๋ถ„์„ํ•  ํ‚ค์›Œ๋“œ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”.</li>
<li>๐Ÿš€ <strong>๋ถ„์„ ์‹คํ–‰</strong>: ์„ค์ •ํ•œ ์˜ต์…˜์— ๋”ฐ๋ผ ํ˜•ํƒœ์†Œ ๋ถ„์„ ๋ฐ ํ‚ค์›Œ๋“œ ๋ถ„์„์ด ์ˆ˜ํ–‰๋˜์–ด ๊ฒฐ๊ณผ๊ฐ€ ํ‘œ์™€ Excel ํŒŒ์ผ๋กœ ์ถœ๋ ฅ๋ฉ๋‹ˆ๋‹ค.</li>
<li>๐Ÿ“ฅ <strong>Excel ๋‹ค์šด๋กœ๋“œ</strong>: ๋ถ„์„ ๊ฒฐ๊ณผ๋ฅผ Excel ํŒŒ์ผ๋กœ ๋‹ค์šด๋กœ๋“œํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.</li>
</ul>
<p><strong>Tip:</strong> ๋ถ„์„ ๊ฒฐ๊ณผ๋Š” ์‹ค์‹œ๊ฐ„์œผ๋กœ ์—…๋ฐ์ดํŠธ๋˜๋ฉฐ, ํ•„์š”์‹œ ์ˆ˜์ • ํ›„ ๋‹ค์‹œ ๋ถ„์„ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ์ฆ๊ฑฐ์šด ๋ถ„์„ ๋˜์„ธ์š”! ๐Ÿ˜Š</p>
</div>
""")
# ์ด๋ฒคํŠธ ์—ฐ๊ฒฐ (๋น„๋™๊ธฐ ํ•จ์ˆ˜ ์‚ฌ์šฉ)
scrape_button.click(fn=fetch_blog_content, inputs=blog_url_input, outputs=blog_content_box)
analyze_button.click(fn=analysis_handler,
inputs=[blog_content_box, remove_freq_checkbox, direct_keyword_box, direct_keyword_only_checkbox],
outputs=[result_df, excel_file])
if __name__ == "__main__":
debug_log("Gradio ์•ฑ ์‹คํ–‰ ์‹œ์ž‘")
demo.launch()
debug_log("Gradio ์•ฑ ์‹คํ–‰ ์ข…๋ฃŒ")