Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from newspaper import Article | |
| from newspaper import Config | |
| from transformers import pipeline | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import re | |
| from bs4 import BeautifulSoup as bs | |
| import requests | |
| from transformers import PreTrainedTokenizerFast, BartForConditionalGeneration | |
| # Load Model and Tokenize | |
| def get_summary(input_text): | |
| tokenizer = PreTrainedTokenizerFast.from_pretrained("ainize/kobart-news") | |
| summary_model = BartForConditionalGeneration.from_pretrained("ainize/kobart-news") | |
| input_ids = tokenizer.encode(input_text, return_tensors="pt") | |
| summary_text_ids = summary_model.generate( | |
| input_ids=input_ids, | |
| bos_token_id=summary_model.config.bos_token_id, | |
| eos_token_id=summary_model.config.eos_token_id, | |
| length_penalty=2.0, | |
| max_length=142, | |
| min_length=56, | |
| num_beams=4, | |
| ) | |
| return tokenizer.decode(summary_text_ids[0], skip_special_tokens=True) | |
| USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0' | |
| config = Config() | |
| config.browser_user_agent = USER_AGENT | |
| config.request_timeout = 10 | |
| class news_collector: | |
| def __init__(self): | |
| self.examples = [] | |
| def get_new_parser(self, url): | |
| article = Article(url, language='ko') | |
| article.download() | |
| article.parse() | |
| return article | |
| def get_news_links(self, page=''): | |
| url = "https://news.daum.net/breakingnews/economic" | |
| response = requests.get(url) | |
| html_text = response.text | |
| soup = bs(response.text, 'html.parser') | |
| news_titles = soup.select("a.link_txt") | |
| links = [item.attrs['href'] for item in news_titles ] | |
| https_links = [item for item in links if item.startswith('https') == True] | |
| https_links | |
| return https_links | |
| def update_news_examples(self): | |
| news_links = self.get_news_links() | |
| for news_url in news_links: | |
| article = self.get_new_parser(news_url) | |
| self.examples.append(get_summary(article.text)) | |
| title = "๊ท ํ์กํ ๋ด์ค ์ฝ๊ธฐ (Balanced News Reading)" | |
| with gr.Blocks() as demo: | |
| news = news_collector() | |
| gr.Markdown( | |
| """ | |
| # ๊ท ํ์กํ ๋ด์ค ์ฝ๊ธฐ (Balanced News Reading) | |
| ๊ธ์ ์ ์ธ ๊ธฐ์ฌ์ ๋ถ์ ์ ์ธ ๊ธฐ์ฌ์ธ์ง ํ์ธํ์ฌ ๋ด์ค๋ฅผ ์ฝ์ ์ ์์ต๋๋ค. ์ต๊ทผ ๊ฒฝ์ ๋ด์ค๊ธฐ์ฌ๋ฅผ ๊ฐ์ ธ์ Example์์ ๋ฐ๋ก ํ์ธํ ์ ์๋๋ก ๊ตฌ์ฑํ์ต๋๋ค. | |
| ## ์ฌ์ฉ๋ฐฉ๋ฒ | |
| Daum๋ด์ค์ ๊ฒฝ์ ๊ธฐ์ฌ๋ฅผ ๊ฐ์ ธ์ ๋ด์ฉ์ ์์ฝํ๊ณ `Example`์ ๊ฐ์ ธ์ต๋๋ค. ๊ฐ์ ๋ถ์์ ํ๊ณ ์ถ์ ๊ธฐ์ฌ๋ฅผ `Examples`์์ ์ ํํด์ `Submit`์ ๋๋ฅด๋ฉด `Classification`์ | |
| ํด๋น ๊ธฐ์ฌ์ ๊ฐ์ ํ๊ฐ ๊ฒฐ๊ณผ๊ฐ ํ์๋ฉ๋๋ค. | |
| ๊ฐ์ ํ๊ฐ๋ ๊ฐ ์ํ์ ํ๋ฅ ์ ๋ณด๊ฐ `neutral`, `positive`, `negative` 3๊ฐ์ง๋ก ํํ๋ฉ๋๋ค. | |
| ## ๊ตฌ์กฐ ์ค๋ช | |
| ๋ด์ค๊ธฐ์ฌ๋ฅผ ํฌ๋กค๋ง ๋ฐ ์์ฝ ๋ชจ๋ธ์ ์ด์ฉํ ๊ธฐ์ฌ ์์ฝ -> ๊ธฐ์ฌ ์์ฝ์ ๋ณด Example์ ์ถ๊ฐ -> ํ๊ตญ์ด fine-tunningํ ๊ฐ์ ํ๊ฐ ๋ชจ๋ธ์ ์ด์ฉํ ๊ฐ์ ใ ใ ๊ฐ | |
| """) | |
| news.update_news_examples() | |
| gr.load("models/gabrielyang/finance_news_classifier-KR_v7", | |
| inputs = gr.Textbox( placeholder="๋ด์ค ๊ธฐ์ฌ ๋ด์ฉ์ ์ ๋ ฅํ์ธ์." ), | |
| examples=news.examples) | |
| if __name__ == "__main__": | |
| demo.launch() |