Spaces:
Sleeping
Sleeping
import gradio as gr | |
import requests | |
import aiohttp | |
import asyncio | |
from bs4 import BeautifulSoup | |
import urllib.parse # iframe ๊ฒฝ๋ก ๋ณด์ ์ ์ํ ๋ชจ๋ | |
import re | |
import logging | |
import tempfile | |
import pandas as pd | |
import mecab # pythonโmecabโko ๋ผ์ด๋ธ๋ฌ๋ฆฌ ์ฌ์ฉ | |
import os | |
import time | |
import hmac | |
import hashlib | |
import base64 | |
# ๋๋ฒ๊น (๋ก๊ทธ)์ฉ ํจ์ | |
def debug_log(message: str): | |
print(f"[DEBUG] {message}") | |
# --- ๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ์คํฌ๋ํ (๋น๋๊ธฐ ๋ฒ์ ) --- | |
async def scrape_naver_blog(url: str) -> str: | |
debug_log("scrape_naver_blog ํจ์ ์์") | |
debug_log(f"์์ฒญ๋ฐ์ URL: {url}") | |
headers = { | |
"User-Agent": ( | |
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) " | |
"AppleWebKit/537.36 (KHTML, like Gecko) " | |
"Chrome/96.0.4664.110 Safari/537.36" | |
) | |
} | |
try: | |
async with aiohttp.ClientSession() as session: | |
async with session.get(url, headers=headers) as response: | |
debug_log("HTTP GET ์์ฒญ(๋ฉ์ธ ํ์ด์ง) ์๋ฃ") | |
if response.status != 200: | |
debug_log(f"์์ฒญ ์คํจ, ์ํ์ฝ๋: {response.status}") | |
return f"์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค. ์ํ์ฝ๋: {response.status}" | |
html = await response.text() | |
soup = BeautifulSoup(html, "html.parser") | |
debug_log("HTML ํ์ฑ(๋ฉ์ธ ํ์ด์ง) ์๋ฃ") | |
iframe = soup.select_one("iframe#mainFrame") | |
if not iframe: | |
debug_log("iframe#mainFrame ํ๊ทธ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค.") | |
return "๋ณธ๋ฌธ iframe์ ์ฐพ์ ์ ์์ต๋๋ค." | |
iframe_src = iframe.get("src") | |
if not iframe_src: | |
debug_log("iframe src๊ฐ ์กด์ฌํ์ง ์์ต๋๋ค.") | |
return "๋ณธ๋ฌธ iframe์ src๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค." | |
parsed_iframe_url = urllib.parse.urljoin(url, iframe_src) | |
debug_log(f"iframe ํ์ด์ง ์์ฒญ URL: {parsed_iframe_url}") | |
async with aiohttp.ClientSession() as session: | |
async with session.get(parsed_iframe_url, headers=headers) as iframe_response: | |
debug_log("HTTP GET ์์ฒญ(iframe ํ์ด์ง) ์๋ฃ") | |
if iframe_response.status != 200: | |
debug_log(f"iframe ์์ฒญ ์คํจ, ์ํ์ฝ๋: {iframe_response.status}") | |
return f"iframe์์ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค. ์ํ์ฝ๋: {iframe_response.status}" | |
iframe_html = await iframe_response.text() | |
iframe_soup = BeautifulSoup(iframe_html, "html.parser") | |
debug_log("HTML ํ์ฑ(iframe ํ์ด์ง) ์๋ฃ") | |
title_div = iframe_soup.select_one('.se-module.se-module-text.se-title-text') | |
title = title_div.get_text(strip=True) if title_div else "์ ๋ชฉ์ ์ฐพ์ ์ ์์ต๋๋ค." | |
debug_log(f"์ถ์ถ๋ ์ ๋ชฉ: {title}") | |
content_div = iframe_soup.select_one('.se-main-container') | |
if content_div: | |
content = content_div.get_text("\n", strip=True) | |
else: | |
content = "๋ณธ๋ฌธ์ ์ฐพ์ ์ ์์ต๋๋ค." | |
debug_log("๋ณธ๋ฌธ ์ถ์ถ ์๋ฃ") | |
result = f"[์ ๋ชฉ]\n{title}\n\n[๋ณธ๋ฌธ]\n{content}" | |
debug_log("์ ๋ชฉ๊ณผ ๋ณธ๋ฌธ ํฉ์นจ ์๋ฃ") | |
return result | |
except Exception as e: | |
debug_log(f"์๋ฌ ๋ฐ์: {str(e)}") | |
return f"์คํฌ๋ํ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}" | |
# --- ๋ค์ด๋ฒ ๊ฒ์ ๋ฐ ๊ด๊ณ API ๊ด๋ จ --- | |
def generate_signature(timestamp, method, uri, secret_key): | |
message = f"{timestamp}.{method}.{uri}" | |
digest = hmac.new(secret_key.encode("utf-8"), message.encode("utf-8"), hashlib.sha256).digest() | |
return base64.b64encode(digest).decode() | |
def get_header(method, uri, api_key, secret_key, customer_id): | |
timestamp = str(round(time.time() * 1000)) | |
signature = generate_signature(timestamp, method, uri, secret_key) | |
return { | |
"Content-Type": "application/json; charset=UTF-8", | |
"X-Timestamp": timestamp, | |
"X-API-KEY": api_key, | |
"X-Customer": str(customer_id), | |
"X-Signature": signature | |
} | |
# --- ์ฐ๊ด ํค์๋ ์กฐํ (๋น๋๊ธฐ) --- | |
async def fetch_related_keywords(keyword): | |
debug_log(f"fetch_related_keywords ํธ์ถ, ํค์๋: {keyword}") | |
API_KEY = os.environ["NAVER_API_KEY"] | |
SECRET_KEY = os.environ["NAVER_SECRET_KEY"] | |
CUSTOMER_ID = os.environ["NAVER_CUSTOMER_ID"] | |
BASE_URL = "https://api.naver.com" | |
uri = "/keywordstool" | |
method = "GET" | |
headers = get_header(method, uri, API_KEY, SECRET_KEY, CUSTOMER_ID) | |
params = { | |
"hintKeywords": [keyword], | |
"showDetail": "1" | |
} | |
async with aiohttp.ClientSession() as session: | |
async with session.get(BASE_URL + uri, headers=headers, params=params) as response: | |
data = await response.json() | |
if "keywordList" not in data: | |
return pd.DataFrame() | |
df = pd.DataFrame(data["keywordList"]) | |
if len(df) > 100: | |
df = df.head(100) | |
def parse_count(x): | |
try: | |
return int(str(x).replace(",", "")) | |
except: | |
return 0 | |
df["PC์๊ฒ์๋"] = df["monthlyPcQcCnt"].apply(parse_count) | |
df["๋ชจ๋ฐ์ผ์๊ฒ์๋"] = df["monthlyMobileQcCnt"].apply(parse_count) | |
df["ํ ํ์๊ฒ์๋"] = df["PC์๊ฒ์๋"] + df["๋ชจ๋ฐ์ผ์๊ฒ์๋"] | |
df.rename(columns={"relKeyword": "์ ๋ณดํค์๋"}, inplace=True) | |
result_df = df[["์ ๋ณดํค์๋", "PC์๊ฒ์๋", "๋ชจ๋ฐ์ผ์๊ฒ์๋", "ํ ํ์๊ฒ์๋"]] | |
debug_log("fetch_related_keywords ์๋ฃ") | |
return result_df | |
# --- ๋ธ๋ก๊ทธ ๋ฌธ์์ ์กฐํ (๋น๋๊ธฐ) --- | |
async def fetch_blog_count(keyword): | |
debug_log(f"fetch_blog_count ํธ์ถ, ํค์๋: {keyword}") | |
client_id = os.environ["NAVER_SEARCH_CLIENT_ID"] | |
client_secret = os.environ["NAVER_SEARCH_CLIENT_SECRET"] | |
url = "https://openapi.naver.com/v1/search/blog.json" | |
headers = { | |
"X-Naver-Client-Id": client_id, | |
"X-Naver-Client-Secret": client_secret | |
} | |
params = {"query": keyword, "display": 1} | |
async with aiohttp.ClientSession() as session: | |
async with session.get(url, headers=headers, params=params) as response: | |
if response.status == 200: | |
data = await response.json() | |
debug_log(f"fetch_blog_count ๊ฒฐ๊ณผ: {data.get('total', 0)}") | |
return data.get("total", 0) | |
else: | |
debug_log(f"fetch_blog_count ์ค๋ฅ, ์ํ์ฝ๋: {response.status}") | |
return 0 | |
def create_excel_file(df): | |
with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp: | |
excel_path = tmp.name | |
df.to_excel(excel_path, index=False, engine='openpyxl') | |
debug_log(f"Excel ํ์ผ ์์ฑ๋จ: {excel_path}") | |
return excel_path | |
# --- ํค์๋ ๊ฒ์ (๋น๋๊ธฐ) --- | |
async def process_keyword(keywords: str, include_related: bool): | |
debug_log(f"process_keyword ํธ์ถ, ํค์๋๋ค: {keywords}, ์ฐ๊ด๊ฒ์์ด ํฌํจ: {include_related}") | |
input_keywords = [k.strip() for k in keywords.splitlines() if k.strip()] | |
result_dfs = [] | |
for idx, kw in enumerate(input_keywords): | |
df_kw = await fetch_related_keywords(kw) | |
if df_kw.empty: | |
continue | |
row_kw = df_kw[df_kw["์ ๋ณดํค์๋"] == kw] | |
if not row_kw.empty: | |
result_dfs.append(row_kw) | |
else: | |
result_dfs.append(df_kw.head(1)) | |
if include_related and idx == 0: | |
df_related = df_kw[df_kw["์ ๋ณดํค์๋"] != kw] | |
if not df_related.empty: | |
result_dfs.append(df_related) | |
if result_dfs: | |
result_df = pd.concat(result_dfs, ignore_index=True) | |
result_df.drop_duplicates(subset=["์ ๋ณดํค์๋"], inplace=True) | |
else: | |
result_df = pd.DataFrame(columns=["์ ๋ณดํค์๋", "PC์๊ฒ์๋", "๋ชจ๋ฐ์ผ์๊ฒ์๋", "ํ ํ์๊ฒ์๋"]) | |
# ๋ธ๋ก๊ทธ ๋ฌธ์์ ์กฐํ๋ฅผ ๋ณ๋ ฌ๋ก ์ฒ๋ฆฌ | |
tasks = [fetch_blog_count(kw) for kw in result_df["์ ๋ณดํค์๋"]] | |
counts = await asyncio.gather(*tasks) | |
result_df["๋ธ๋ก๊ทธ๋ฌธ์์"] = counts | |
result_df.sort_values(by="ํ ํ์๊ฒ์๋", ascending=False, inplace=True) | |
debug_log("process_keyword ์๋ฃ") | |
return result_df, create_excel_file(result_df) | |
# --- ํํ์ ๋ถ์ (์ฐธ๊ณ ์ฝ๋-1, ๋๊ธฐ) --- | |
def analyze_text(text: str): | |
logging.basicConfig(level=logging.DEBUG) | |
logger = logging.getLogger(__name__) | |
logger.debug("์๋ณธ ํ ์คํธ: %s", text) | |
filtered_text = re.sub(r'[^๊ฐ-ํฃ]', '', text) | |
logger.debug("ํํฐ๋ง๋ ํ ์คํธ: %s", filtered_text) | |
if not filtered_text: | |
logger.debug("์ ํจํ ํ๊ตญ์ด ํ ์คํธ๊ฐ ์์.") | |
return pd.DataFrame(columns=["๋จ์ด", "๋น๋์"]), "" | |
mecab_instance = mecab.MeCab() | |
tokens = mecab_instance.pos(filtered_text) | |
logger.debug("ํํ์ ๋ถ์ ๊ฒฐ๊ณผ: %s", tokens) | |
freq = {} | |
for word, pos in tokens: | |
if word and word.strip() and pos.startswith("NN"): | |
freq[word] = freq.get(word, 0) + 1 | |
logger.debug("๋จ์ด: %s, ํ์ฌ: %s, ๋น๋: %d", word, pos, freq[word]) | |
sorted_freq = sorted(freq.items(), key=lambda x: x[1], reverse=True) | |
logger.debug("์ ๋ ฌ๋ ๋จ์ด ๋น๋: %s", sorted_freq) | |
df = pd.DataFrame(sorted_freq, columns=["๋จ์ด", "๋น๋์"]) | |
logger.debug("ํํ์ ๋ถ์ DataFrame ์์ฑ๋จ, shape: %s", df.shape) | |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") | |
df.to_excel(temp_file.name, index=False, engine='openpyxl') | |
temp_file.close() | |
logger.debug("Excel ํ์ผ ์์ฑ๋จ: %s", temp_file.name) | |
return df, temp_file.name | |
# --- ํํ์ ๋ถ์๊ณผ ๊ฒ์๋/๋ธ๋ก๊ทธ๋ฌธ์์ ๋ณํฉ (๋น๋๊ธฐ) --- | |
async def morphological_analysis_and_enrich(text: str, remove_freq1: bool): | |
debug_log("morphological_analysis_and_enrich ํจ์ ์์") | |
df_freq, _ = analyze_text(text) | |
if df_freq.empty: | |
debug_log("ํํ์ ๋ถ์ ๊ฒฐ๊ณผ๊ฐ ๋น ๋ฐ์ดํฐํ๋ ์์ ๋๋ค.") | |
return df_freq, "" | |
if remove_freq1: | |
before_shape = df_freq.shape | |
df_freq = df_freq[df_freq["๋น๋์"] != 1] | |
debug_log(f"๋น๋์ 1 ์ ๊ฑฐ ์ ์ฉ๋จ. {before_shape} -> {df_freq.shape}") | |
keywords = "\n".join(df_freq["๋จ์ด"].tolist()) | |
debug_log(f"๋ถ์๋ ํค์๋: {keywords}") | |
df_keyword_info, _ = await process_keyword(keywords, include_related=False) | |
debug_log("๊ฒ์๋ ๋ฐ ๋ธ๋ก๊ทธ๋ฌธ์์ ์กฐํ ์๋ฃ") | |
merged_df = pd.merge(df_freq, df_keyword_info, left_on="๋จ์ด", right_on="์ ๋ณดํค์๋", how="left") | |
merged_df.drop(columns=["์ ๋ณดํค์๋"], inplace=True) | |
merged_excel_path = create_excel_file(merged_df) | |
debug_log("morphological_analysis_and_enrich ํจ์ ์๋ฃ") | |
return merged_df, merged_excel_path | |
# --- ์ง์ ํค์๋ ๋ถ์ (๋จ๋ ๋ถ์, ๋น๋๊ธฐ) --- | |
async def direct_keyword_analysis(text: str, keyword_input: str): | |
debug_log("direct_keyword_analysis ํจ์ ์์") | |
keywords = re.split(r'[\n,]+', keyword_input) | |
keywords = [kw.strip() for kw in keywords if kw.strip()] | |
debug_log(f"์ ๋ ฅ๋ ํค์๋ ๋ชฉ๋ก: {keywords}") | |
results = [] | |
for kw in keywords: | |
count = text.count(kw) | |
results.append((kw, count)) | |
debug_log(f"ํค์๋ '{kw}'์ ๋น๋์: {count}") | |
# ์ง์ ์ ๋ ฅ ํค์๋๊ฐ ๋ณธ๋ฌธ์ ์์ผ๋ฉด ์ถ๊ฐ ์กฐํ | |
if kw not in text: | |
df_direct, _ = await process_keyword(kw, include_related=False) | |
if (not df_direct.empty) and (kw in df_direct["์ ๋ณดํค์๋"].values): | |
row = df_direct[df_direct["์ ๋ณดํค์๋"] == kw].iloc[0] | |
pc = row.get("PC์๊ฒ์๋", None) | |
mobile = row.get("๋ชจ๋ฐ์ผ์๊ฒ์๋", None) | |
total = row.get("ํ ํ์๊ฒ์๋", None) | |
blog_count = row.get("๋ธ๋ก๊ทธ๋ฌธ์์", None) | |
else: | |
pc = mobile = total = blog_count = None | |
# ๊ฒฐ๊ณผ์ ์ ํ ์ถ๊ฐ | |
results.append((kw, count)) | |
df = pd.DataFrame(results, columns=["ํค์๋", "๋น๋์"]) | |
excel_path = create_excel_file(df) | |
debug_log("direct_keyword_analysis ํจ์ ์๋ฃ") | |
return df, excel_path | |
# --- ํตํฉ ๋ถ์ (ํํ์ ๋ถ์ + ์ง์ ํค์๋ ๋ถ์, ๋น๋๊ธฐ) --- | |
async def combined_analysis(blog_text: str, remove_freq1: bool, direct_keyword_input: str): | |
debug_log("combined_analysis ํจ์ ์์") | |
merged_df, _ = await morphological_analysis_and_enrich(blog_text, remove_freq1) | |
if "์ง์ ์ ๋ ฅ" not in merged_df.columns: | |
merged_df["์ง์ ์ ๋ ฅ"] = "" | |
direct_keywords = re.split(r'[\n,]+', direct_keyword_input) | |
direct_keywords = [kw.strip() for kw in direct_keywords if kw.strip()] | |
debug_log(f"์ ๋ ฅ๋ ์ง์ ํค์๋: {direct_keywords}") | |
for dk in direct_keywords: | |
if dk in merged_df["๋จ์ด"].values: | |
merged_df.loc[merged_df["๋จ์ด"] == dk, "์ง์ ์ ๋ ฅ"] = "์ง์ ์ ๋ ฅ" | |
else: | |
freq = blog_text.count(dk) | |
df_direct, _ = await process_keyword(dk, include_related=False) | |
if (not df_direct.empty) and (dk in df_direct["์ ๋ณดํค์๋"].values): | |
row = df_direct[df_direct["์ ๋ณดํค์๋"] == dk].iloc[0] | |
pc = row.get("PC์๊ฒ์๋", None) | |
mobile = row.get("๋ชจ๋ฐ์ผ์๊ฒ์๋", None) | |
total = row.get("ํ ํ์๊ฒ์๋", None) | |
blog_count = row.get("๋ธ๋ก๊ทธ๋ฌธ์์", None) | |
else: | |
pc = mobile = total = blog_count = None | |
new_row = { | |
"๋จ์ด": dk, | |
"๋น๋์": freq, | |
"PC์๊ฒ์๋": pc, | |
"๋ชจ๋ฐ์ผ์๊ฒ์๋": mobile, | |
"ํ ํ์๊ฒ์๋": total, | |
"๋ธ๋ก๊ทธ๋ฌธ์์": blog_count, | |
"์ง์ ์ ๋ ฅ": "์ง์ ์ ๋ ฅ" | |
} | |
merged_df = pd.concat([merged_df, pd.DataFrame([new_row])], ignore_index=True) | |
merged_df = merged_df.sort_values(by="๋น๋์", ascending=False).reset_index(drop=True) | |
combined_excel = create_excel_file(merged_df) | |
debug_log("combined_analysis ํจ์ ์๋ฃ") | |
return merged_df, combined_excel | |
# --- ๋ถ์ ํธ๋ค๋ฌ (๋น๋๊ธฐ) --- | |
async def analysis_handler(blog_text: str, remove_freq1: bool, direct_keyword_input: str, direct_keyword_only: bool): | |
debug_log("analysis_handler ํจ์ ์์") | |
if direct_keyword_only: | |
return await direct_keyword_analysis(blog_text, direct_keyword_input) | |
else: | |
return await combined_analysis(blog_text, remove_freq1, direct_keyword_input) | |
# --- ์คํฌ๋ํ ์คํ ํธ๋ค๋ฌ (๋น๋๊ธฐ) --- | |
async def fetch_blog_content(url: str): | |
debug_log("fetch_blog_content ํจ์ ์์") | |
content = await scrape_naver_blog(url) | |
debug_log("fetch_blog_content ํจ์ ์๋ฃ") | |
return content | |
# --- Custom CSS --- | |
custom_css = """ | |
/* ์ ์ฒด ์ปจํ ์ด๋ ์คํ์ผ */ | |
.gradio-container { | |
max-width: 960px; | |
margin: auto; | |
font-family: 'Helvetica Neue', Arial, sans-serif; | |
background: #f5f7fa; | |
padding: 2rem; | |
} | |
/* ํค๋ ์คํ์ผ */ | |
.custom-header { | |
text-align: center; | |
font-size: 2.5rem; | |
font-weight: bold; | |
margin-bottom: 1.5rem; | |
color: #333; | |
} | |
/* ๊ทธ๋ฃน ๋ฐ์ค ์คํ์ผ */ | |
.custom-group { | |
background: #ffffff; | |
border-radius: 8px; | |
padding: 1.5rem; | |
box-shadow: 0 2px 8px rgba(0,0,0,0.1); | |
margin-bottom: 1.5rem; | |
} | |
/* ๋ฒํผ ์คํ์ผ */ | |
.custom-button { | |
background-color: #007bff; | |
color: #fff; | |
border: none; | |
border-radius: 4px; | |
padding: 0.6rem 1.2rem; | |
font-size: 1rem; | |
cursor: pointer; | |
transition: background-color 0.3s; | |
} | |
.custom-button:hover { | |
background-color: #0056b3; | |
} | |
/* ์ฒดํฌ๋ฐ์ค ์คํ์ผ */ | |
.custom-checkbox { | |
margin-right: 1rem; | |
font-size: 1rem; | |
font-weight: bold; | |
} | |
/* ๊ฒฐ๊ณผ ํ ์ด๋ธ ๋ฐ ๋ค์ด๋ก๋ ๋ฒํผ */ | |
.custom-result { | |
margin-top: 1.5rem; | |
} | |
/* ๊ฐ์ด๋ฐ ์ ๋ ฌ */ | |
.centered { | |
display: flex; | |
justify-content: center; | |
align-items: center; | |
} | |
/* ์ฌ์ฉ์ค๋ช ์คํ์ผ */ | |
.usage-instructions { | |
font-size: 1.1rem; | |
line-height: 1.6; | |
color: #555; | |
background: #fff; | |
padding: 1.5rem; | |
border-radius: 8px; | |
box-shadow: 0 2px 8px rgba(0,0,0,0.1); | |
margin-top: 2rem; | |
} | |
.usage-instructions h2 { | |
font-size: 1.8rem; | |
font-weight: bold; | |
margin-bottom: 1rem; | |
color: #333; | |
} | |
.usage-instructions ul { | |
list-style: disc; | |
margin-left: 2rem; | |
} | |
""" | |
# --- Gradio ์ธํฐํ์ด์ค ๊ตฌ์ฑ --- | |
with gr.Blocks(title="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ํํ์ ๋ถ์ ์๋น์ค", css=custom_css) as demo: | |
gr.HTML("<div class='custom-header'>๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ํํ์ ๋ถ์ ์๋น์ค ๐</div>") | |
with gr.Group(elem_classes="custom-group"): | |
with gr.Row(): | |
blog_url_input = gr.Textbox(label="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ๋งํฌ", placeholder="์: https://blog.naver.com/ssboost/222983068507", lines=1) | |
with gr.Row(elem_classes="centered"): | |
scrape_button = gr.Button("์คํฌ๋ํ ์คํ", elem_classes="custom-button") | |
with gr.Group(elem_classes="custom-group"): | |
blog_content_box = gr.Textbox(label="๋ธ๋ก๊ทธ ๋ด์ฉ (์์ ๊ฐ๋ฅ)", lines=10, placeholder="์คํฌ๋ํ๋ ๋ธ๋ก๊ทธ ๋ด์ฉ์ด ์ฌ๊ธฐ์ ํ์๋ฉ๋๋ค.") | |
with gr.Group(elem_classes="custom-group"): | |
with gr.Row(): | |
remove_freq_checkbox = gr.Checkbox(label="๋น๋์1 ์ ๊ฑฐ", value=True, elem_classes="custom-checkbox") | |
with gr.Row(): | |
direct_keyword_only_checkbox = gr.Checkbox(label="์ง์ ํค์๋ ์ ๋ ฅ๋ง ๋ถ์", value=False, elem_classes="custom-checkbox") | |
with gr.Row(): | |
direct_keyword_box = gr.Textbox(label="์ง์ ํค์๋ ์ ๋ ฅ (์ํฐ ๋๋ ','๋ก ๊ตฌ๋ถ)", lines=2, placeholder="์: ํค์๋1, ํค์๋2\nํค์๋3") | |
with gr.Group(elem_classes="custom-group"): | |
with gr.Row(elem_classes="centered"): | |
analyze_button = gr.Button("๋ถ์ ์คํ", elem_classes="custom-button") | |
with gr.Group(elem_classes="custom-group custom-result"): | |
result_df = gr.Dataframe(label="ํตํฉ ๋ถ์ ๊ฒฐ๊ณผ (๋จ์ด, ๋น๋์, ๊ฒ์๋, ๋ธ๋ก๊ทธ๋ฌธ์์, ์ง์ ์ ๋ ฅ)", interactive=True) | |
with gr.Group(elem_classes="custom-group"): | |
excel_file = gr.File(label="Excel ๋ค์ด๋ก๋") | |
with gr.Group(elem_classes="custom-group"): | |
usage_html = gr.HTML(""" | |
<div class="usage-instructions"> | |
<h2>์ฌ์ฉ ์ค๋ช ๐</h2> | |
<ul> | |
<li>๐ <strong>๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ๋งํฌ</strong>: ๋ถ์ํ ๋ค์ด๋ฒ ๋ธ๋ก๊ทธ์ URL์ ์ ๋ ฅํ์ธ์.</li> | |
<li>โ๏ธ <strong>์คํฌ๋ํ ์คํ</strong>: ๋งํฌ ์ ๋ ฅ ํ ๋ฒํผ์ ํด๋ฆญํ๋ฉด ๋ธ๋ก๊ทธ์ ์ ๋ชฉ๊ณผ ๋ณธ๋ฌธ์ด ์๋์ผ๋ก ๋ถ๋ฌ์์ง๋๋ค.</li> | |
<li>๐ <strong>๋ธ๋ก๊ทธ ๋ด์ฉ (์์ ๊ฐ๋ฅ)</strong>: ๋ถ๋ฌ์จ ๋ธ๋ก๊ทธ ๋ด์ฉ์ด ํ์๋๋ฉฐ, ํ์์ ๋ฐ๋ผ ์ง์ ์์ ํ ์ ์์ต๋๋ค.</li> | |
<li>โ๏ธ <strong>์ต์ ์ค์ </strong>: | |
<ul> | |
<li><em>๋น๋์1 ์ ๊ฑฐ</em>: ๊ธฐ๋ณธ ์ ํ๋์ด ์์ผ๋ฉฐ, ๋น๋์๊ฐ 1์ธ ๋จ์ด๋ ๊ฒฐ๊ณผ์์ ์ ์ธํฉ๋๋ค.</li> | |
<li><em>์ง์ ํค์๋ ์ ๋ ฅ๋ง ๋ถ์</em>: ์ด ์ต์ ์ ์ ํํ๋ฉด, ๋ธ๋ก๊ทธ ๋ณธ๋ฌธ์์ ์ง์ ์ ๋ ฅํ ํค์๋๋ง ๋ถ์ํฉ๋๋ค.</li> | |
</ul> | |
</li> | |
<li>๐ค <strong>์ง์ ํค์๋ ์ ๋ ฅ</strong>: ์ํฐ ๋๋ ์ผํ(,)๋ก ๊ตฌ๋ถํ์ฌ ๋ถ์ํ ํค์๋๋ฅผ ์ ๋ ฅํ์ธ์.</li> | |
<li>๐ <strong>๋ถ์ ์คํ</strong>: ์ค์ ํ ์ต์ ์ ๋ฐ๋ผ ํํ์ ๋ถ์ ๋ฐ ํค์๋ ๋ถ์์ด ์ํ๋์ด ๊ฒฐ๊ณผ๊ฐ ํ์ Excel ํ์ผ๋ก ์ถ๋ ฅ๋ฉ๋๋ค.</li> | |
<li>๐ฅ <strong>Excel ๋ค์ด๋ก๋</strong>: ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ Excel ํ์ผ๋ก ๋ค์ด๋ก๋ํ ์ ์์ต๋๋ค.</li> | |
</ul> | |
<p><strong>Tip:</strong> ๋ถ์ ๊ฒฐ๊ณผ๋ ์ค์๊ฐ์ผ๋ก ์ ๋ฐ์ดํธ๋๋ฉฐ, ํ์์ ์์ ํ ๋ค์ ๋ถ์ํ ์ ์์ต๋๋ค. ์ฆ๊ฑฐ์ด ๋ถ์ ๋์ธ์! ๐</p> | |
</div> | |
""") | |
# ์ด๋ฒคํธ ์ฐ๊ฒฐ (๋น๋๊ธฐ ํจ์ ์ฌ์ฉ) | |
scrape_button.click(fn=fetch_blog_content, inputs=blog_url_input, outputs=blog_content_box) | |
analyze_button.click(fn=analysis_handler, | |
inputs=[blog_content_box, remove_freq_checkbox, direct_keyword_box, direct_keyword_only_checkbox], | |
outputs=[result_df, excel_file]) | |
if __name__ == "__main__": | |
debug_log("Gradio ์ฑ ์คํ ์์") | |
demo.launch() | |
debug_log("Gradio ์ฑ ์คํ ์ข ๋ฃ") | |