Spaces:

Oriner
/

NewSummarize

Sleeping

App Files Files Community

Oriner commited on 16 days ago

Commit

923dd57

1 Parent(s): bf0377a

Add application file

Browse files

Files changed (2) hide show

app.py +77 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,77 @@

+# app.py
+import gradio as gr
+import requests
+from bs4 import BeautifulSoup
+from transformers import BartForConditionalGeneration, PreTrainedTokenizerFast
+import torch
+import re
+# 모델 로딩
+tokenizer = PreTrainedTokenizerFast.from_pretrained("gogamza/kobart-summarization")
+model = BartForConditionalGeneration.from_pretrained("gogamza/kobart-summarization")
+# 요약 함수
+def summarize_news(url, min_len, max_len):
+    try:
+        res = requests.get(url)
+        soup = BeautifulSoup(res.text, "html.parser")
+        article = soup.find("article")
+        if article:
+            text = article.get_text()
+        else:
+            body = soup.find("div", id="articleBody") or soup.find("div", class_="news_body")
+            if body:
+                text = body.get_text()
+            else:
+                paragraphs = [p.get_text() for p in soup.find_all("p")]
+                paragraphs = [p.strip() for p in paragraphs if len(p.strip()) > 40]
+                text = " ".join(paragraphs)
+                if len(text) < 30:
+                    text = soup.get_text()
+        text = re.sub(r'[\r\n\t]+', ' ', text)
+        text = re.sub(r'\s+', ' ', text).strip()
+        if len(text) < 30:
+            return "본문이 너무 짧거나 추출에 실패했습니다. 다른 뉴스 URL을 시도해보세요."
+        input_ids = tokenizer.encode(text, return_tensors="pt", max_length=1024, truncation=True)
+        summary_ids = model.generate(
+            input_ids,
+            max_length=int(max_len),
+            min_length=int(min_len),
+            num_beams=4,
+            early_stopping=True,
+            length_penalty=1.2,
+            no_repeat_ngram_size=3,
+            repetition_penalty=1.5
+        )
+        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
+        return summary
+    except Exception as e:
+        return f"오류 발생: {e}"
+# Gradio UI
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("## 📰 뉴스 요약기 (KoBART 기반)")
+    gr.Markdown("뉴스 기사 URL을 입력하면 AI가 요약해줍니다.")
+    with gr.Row():
+        url_input = gr.Textbox(label="뉴스 URL", placeholder="https://news.naver.com/article/...", lines=1)
+        submit_btn = gr.Button("요약하기")
+    with gr.Row():
+        min_len = gr.Slider(20, 200, value=50, step=10, label="최소 길이")
+        max_len = gr.Slider(50, 400, value=150, step=10, label="최대 길이")
+    output = gr.Textbox(label="요약 결과", lines=10)
+    submit_btn.click(fn=summarize_news, inputs=[url_input, min_len, max_len], outputs=output)
+# ✅ Hugging Face Spaces에서는 이렇게 실행
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+transformers
+gradio
+torch
+requests
+beautifulsoup4