Oriner commited on
Commit
923dd57
·
1 Parent(s): bf0377a

Add application file

Browse files
Files changed (2) hide show
  1. app.py +77 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+
3
+ import gradio as gr
4
+ import requests
5
+ from bs4 import BeautifulSoup
6
+ from transformers import BartForConditionalGeneration, PreTrainedTokenizerFast
7
+ import torch
8
+ import re
9
+
10
+ # 모델 로딩
11
+ tokenizer = PreTrainedTokenizerFast.from_pretrained("gogamza/kobart-summarization")
12
+ model = BartForConditionalGeneration.from_pretrained("gogamza/kobart-summarization")
13
+
14
+ # 요약 함수
15
+ def summarize_news(url, min_len, max_len):
16
+ try:
17
+ res = requests.get(url)
18
+ soup = BeautifulSoup(res.text, "html.parser")
19
+
20
+ article = soup.find("article")
21
+ if article:
22
+ text = article.get_text()
23
+ else:
24
+ body = soup.find("div", id="articleBody") or soup.find("div", class_="news_body")
25
+ if body:
26
+ text = body.get_text()
27
+ else:
28
+ paragraphs = [p.get_text() for p in soup.find_all("p")]
29
+ paragraphs = [p.strip() for p in paragraphs if len(p.strip()) > 40]
30
+ text = " ".join(paragraphs)
31
+ if len(text) < 30:
32
+ text = soup.get_text()
33
+
34
+ text = re.sub(r'[\r\n\t]+', ' ', text)
35
+ text = re.sub(r'\s+', ' ', text).strip()
36
+
37
+ if len(text) < 30:
38
+ return "본문이 너무 짧거나 추출에 실패했습니다. 다른 뉴스 URL을 시도해보세요."
39
+
40
+ input_ids = tokenizer.encode(text, return_tensors="pt", max_length=1024, truncation=True)
41
+ summary_ids = model.generate(
42
+ input_ids,
43
+ max_length=int(max_len),
44
+ min_length=int(min_len),
45
+ num_beams=4,
46
+ early_stopping=True,
47
+ length_penalty=1.2,
48
+ no_repeat_ngram_size=3,
49
+ repetition_penalty=1.5
50
+ )
51
+
52
+ summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
53
+ return summary
54
+
55
+ except Exception as e:
56
+ return f"오류 발생: {e}"
57
+
58
+ # Gradio UI
59
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
60
+ gr.Markdown("## 📰 뉴스 요약기 (KoBART 기반)")
61
+ gr.Markdown("뉴스 기사 URL을 입력하면 AI가 요약해줍니다.")
62
+
63
+ with gr.Row():
64
+ url_input = gr.Textbox(label="뉴스 URL", placeholder="https://news.naver.com/article/...", lines=1)
65
+ submit_btn = gr.Button("요약하기")
66
+
67
+ with gr.Row():
68
+ min_len = gr.Slider(20, 200, value=50, step=10, label="최소 길이")
69
+ max_len = gr.Slider(50, 400, value=150, step=10, label="최대 길이")
70
+
71
+ output = gr.Textbox(label="요약 결과", lines=10)
72
+
73
+ submit_btn.click(fn=summarize_news, inputs=[url_input, min_len, max_len], outputs=output)
74
+
75
+ # ✅ Hugging Face Spaces에서는 이렇게 실행
76
+ if __name__ == "__main__":
77
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ transformers
2
+ gradio
3
+ torch
4
+ requests
5
+ beautifulsoup4