File size: 1,537 Bytes
0999206
 
 
 
 
 
 
 
 
 
 
 
 
 
e70f4f1
0999206
 
 
 
 
 
 
5ffbbea
0999206
 
dda7919
0999206
 
 
e70f4f1
058db1f
0999206
e62ede8
e70f4f1
0999206
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from newspaper import Article
from newspaper import Config
import gradio as gr
from transformers import MBartForConditionalGeneration
from transformers import AutoTokenizer

model_name = "haotieu/vietnamese-summarization"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = MBartForConditionalGeneration.from_pretrained(model_name)

def extract_article_text(url):
  USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'
  config = Config()
  config.browser_user_agent = USER_AGENT
  config.request_timeout = 20

  article = Article(url, config=config)
  article.download()
  article.parse()
  text = article.text
  return text
 
def news_summarizer(url):
  text = extract_article_text(url)
  input_ids = tokenizer.encode_plus(text, return_tensors='pt', max_length=512, truncation=True, padding=True)['input_ids']
  summary_ids = model.generate(input_ids,max_length=128,min_length= 64)
  summaries = [tokenizer.decode(s, skip_special_tokens=True) for s in summary_ids]
  return summaries[0]
  
sample_url = 'https://vnexpress.net/them-hai-nuoc-rut-nhan-vien-su-quan-tai-ukraine-4420581.html'
desc =  'This app uses BARTpho model by VinAI to summarize the text of a news article.'
 
summarizer_interface = gr.Interface(fn = news_summarizer, inputs="text", outputs = "text",title="vietnamese news summarizer",
                                    theme = 'huggingface',examples=[sample_url],description=desc
                                    )
summarizer_interface.launch(inline=False)