Spaces:
Sleeping
Sleeping
File size: 2,703 Bytes
923dd57 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
# app.py
import gradio as gr
import requests
from bs4 import BeautifulSoup
from transformers import BartForConditionalGeneration, PreTrainedTokenizerFast
import torch
import re
# 모델 로딩
tokenizer = PreTrainedTokenizerFast.from_pretrained("gogamza/kobart-summarization")
model = BartForConditionalGeneration.from_pretrained("gogamza/kobart-summarization")
# 요약 함수
def summarize_news(url, min_len, max_len):
try:
res = requests.get(url)
soup = BeautifulSoup(res.text, "html.parser")
article = soup.find("article")
if article:
text = article.get_text()
else:
body = soup.find("div", id="articleBody") or soup.find("div", class_="news_body")
if body:
text = body.get_text()
else:
paragraphs = [p.get_text() for p in soup.find_all("p")]
paragraphs = [p.strip() for p in paragraphs if len(p.strip()) > 40]
text = " ".join(paragraphs)
if len(text) < 30:
text = soup.get_text()
text = re.sub(r'[\r\n\t]+', ' ', text)
text = re.sub(r'\s+', ' ', text).strip()
if len(text) < 30:
return "본문이 너무 짧거나 추출에 실패했습니다. 다른 뉴스 URL을 시도해보세요."
input_ids = tokenizer.encode(text, return_tensors="pt", max_length=1024, truncation=True)
summary_ids = model.generate(
input_ids,
max_length=int(max_len),
min_length=int(min_len),
num_beams=4,
early_stopping=True,
length_penalty=1.2,
no_repeat_ngram_size=3,
repetition_penalty=1.5
)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
return summary
except Exception as e:
return f"오류 발생: {e}"
# Gradio UI
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("## 📰 뉴스 요약기 (KoBART 기반)")
gr.Markdown("뉴스 기사 URL을 입력하면 AI가 요약해줍니다.")
with gr.Row():
url_input = gr.Textbox(label="뉴스 URL", placeholder="https://news.naver.com/article/...", lines=1)
submit_btn = gr.Button("요약하기")
with gr.Row():
min_len = gr.Slider(20, 200, value=50, step=10, label="최소 길이")
max_len = gr.Slider(50, 400, value=150, step=10, label="최대 길이")
output = gr.Textbox(label="요약 결과", lines=10)
submit_btn.click(fn=summarize_news, inputs=[url_input, min_len, max_len], outputs=output)
# ✅ Hugging Face Spaces에서는 이렇게 실행
if __name__ == "__main__":
demo.launch()
|