Spaces:
Configuration error
Configuration error
| # app.py | |
| import gradio as gr | |
| import requests | |
| from bs4 import BeautifulSoup | |
| from transformers import BartForConditionalGeneration, PreTrainedTokenizerFast | |
| import torch | |
| import re | |
| # 모델 로딩 | |
| tokenizer = PreTrainedTokenizerFast.from_pretrained("gogamza/kobart-summarization") | |
| model = BartForConditionalGeneration.from_pretrained("gogamza/kobart-summarization") | |
| # 요약 함수 | |
| def summarize_news(url, min_len, max_len): | |
| try: | |
| res = requests.get(url) | |
| soup = BeautifulSoup(res.text, "html.parser") | |
| article = soup.find("article") | |
| if article: | |
| text = article.get_text() | |
| else: | |
| body = soup.find("div", id="articleBody") or soup.find("div", class_="news_body") | |
| if body: | |
| text = body.get_text() | |
| else: | |
| paragraphs = [p.get_text() for p in soup.find_all("p")] | |
| paragraphs = [p.strip() for p in paragraphs if len(p.strip()) > 40] | |
| text = " ".join(paragraphs) | |
| if len(text) < 30: | |
| text = soup.get_text() | |
| text = re.sub(r'[\r\n\t]+', ' ', text) | |
| text = re.sub(r'\s+', ' ', text).strip() | |
| if len(text) < 30: | |
| return "본문이 너무 짧거나 추출에 실패했습니다. 다른 뉴스 URL을 시도해보세요." | |
| input_ids = tokenizer.encode(text, return_tensors="pt", max_length=1024, truncation=True) | |
| summary_ids = model.generate( | |
| input_ids, | |
| max_length=int(max_len), | |
| min_length=int(min_len), | |
| num_beams=4, | |
| early_stopping=True, | |
| length_penalty=1.2, | |
| no_repeat_ngram_size=3, | |
| repetition_penalty=1.5 | |
| ) | |
| summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) | |
| return summary | |
| except Exception as e: | |
| return f"오류 발생: {e}" | |
| # Gradio UI | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("## 📰 뉴스 요약기 (KoBART 기반)") | |
| gr.Markdown("뉴스 기사 URL을 입력하면 AI가 요약해줍니다.") | |
| with gr.Row(): | |
| url_input = gr.Textbox(label="뉴스 URL", placeholder="https://news.naver.com/article/...", lines=1) | |
| submit_btn = gr.Button("요약하기") | |
| with gr.Row(): | |
| min_len = gr.Slider(20, 200, value=50, step=10, label="최소 길이") | |
| max_len = gr.Slider(50, 400, value=150, step=10, label="최대 길이") | |
| output = gr.Textbox(label="요약 결과", lines=10) | |
| submit_btn.click(fn=summarize_news, inputs=[url_input, min_len, max_len], outputs=output) | |
| # ✅ Hugging Face Spaces에서는 이렇게 실행 | |
| if __name__ == "__main__": | |
| demo.launch() | |