JohnKouf commited on
Commit
da39e44
·
verified ·
1 Parent(s): e417927

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -52
app.py CHANGED
@@ -1,61 +1,35 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
- import re
4
- from transformers import AutoTokenizer
5
- from transformers import AutoModelForSeq2SeqLM
6
-
7
 
8
  # Load the model and tokenizer
9
- tokenizer = AutoTokenizer.from_pretrained("kriton/greek-text-summarization")
10
- model = AutoModelForSeq2SeqLM.from_pretrained("kriton/greek-text-summarization")
11
- generator = pipeline("summarization", model="kriton/greek-text-summarization")
12
-
13
-
14
-
15
- #remove incomplete sentences from the output
16
- def remove_incomplete_sentence(text):
17
- sentence_endings = r'[.!?;]'
18
-
19
- sentences = re.split(r'(?<=[.!?;])\s+', text.strip())
20
-
21
- if len(sentences) == 0 or len(sentences) == 1:
22
- return text.strip()
23
-
24
- if re.match(f'.*[{sentence_endings}]$', sentences[-1]):
25
- return text.strip()
26
- return ' '.join(sentences[:-1]).strip()
27
-
28
- # Define the summary generation function
29
- def genarate_summary(article):
30
- inputs = tokenizer(
31
- 'summarize: ' + article,
32
- return_tensors="pt",
33
- max_length=1024,
34
- truncation=True,
35
- padding="max_length",
36
- )
37
-
38
- outputs = model.generate(
39
- inputs["input_ids"],
40
- max_length=1024,
41
- min_length=130,
42
- length_penalty=3.0,
43
- num_beams=8,
44
- early_stopping=True,
45
- repetition_penalty=3.0,
46
- no_repeat_ngram_size=3
47
- )
48
 
49
- return remove_incomplete_sentence(tokenizer.decode(outputs[0], skip_special_tokens=True))
 
 
 
50
 
51
- # Set up Gradio Interface
52
  iface = gr.Interface(
53
- fn=genarate_summary,
54
- inputs="text",
55
- outputs="text",
56
- title="Greek Text Summarizer",
57
- description="Enter an article in Greek, and this tool will generate a summary."
 
58
  )
59
 
60
- # Launch the Gradio Interface
61
  iface.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
 
 
 
 
3
 
4
  # Load the model and tokenizer
5
+ model_name = 'IMISLab/GreekT5-umt5-base-greeksum'
6
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+
9
+ # Set up the summarizer pipeline
10
+ summarizer = pipeline(
11
+ 'summarization',
12
+ model=model,
13
+ tokenizer=tokenizer,
14
+ device=-1, # -1 for CPU; set to 0 for GPU if available
15
+ max_new_tokens=128,
16
+ truncation=True
17
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ # Define the summarization function
20
+ def summarize_text(text):
21
+ output = summarizer('summarize: ' + text)
22
+ return output[0]['summary_text']
23
 
24
+ # Create a Gradio interface
25
  iface = gr.Interface(
26
+ fn=summarize_text, # Function to run
27
+ inputs=gr.Textbox(label="Enter Greek Text", placeholder="Type or paste your text here..."), # Input component
28
+ outputs=gr.Textbox(label="Summary", interactive=True), # Output component
29
+ title="Greek Text Summarization", # Title for the UI
30
+ description="This app uses a pre-trained Greek summarization model to generate a brief summary of your input text.", # Description
31
+ allow_flagging="never" # Optional: Disable flagging feature
32
  )
33
 
34
+ # Launch the interface
35
  iface.launch()