Spaces:

JohnKouf
/

greek_text_summarization

Runtime error

App Files Files Community

JohnKouf commited on Nov 18, 2024

Commit

da39e44

verified ·

1 Parent(s): e417927

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -52

app.py CHANGED Viewed

@@ -1,61 +1,35 @@
 import gradio as gr
-from transformers import pipeline
-import re
-from transformers import AutoTokenizer
-from transformers import AutoModelForSeq2SeqLM
 # Load the model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained("kriton/greek-text-summarization")
-model = AutoModelForSeq2SeqLM.from_pretrained("kriton/greek-text-summarization")
-generator = pipeline("summarization", model="kriton/greek-text-summarization")
-#remove incomplete sentences from the output
-def remove_incomplete_sentence(text):
-    sentence_endings = r'[.!?;]'
-    sentences = re.split(r'(?<=[.!?;])\s+', text.strip())
-    if len(sentences) == 0 or len(sentences) == 1:
-        return text.strip()
-    if re.match(f'.*[{sentence_endings}]$', sentences[-1]):
-        return text.strip()
-    return ' '.join(sentences[:-1]).strip()
-# Define the summary generation function
-def genarate_summary(article):
-    inputs = tokenizer(
-        'summarize: ' + article,
-        return_tensors="pt",
-        max_length=1024,
-        truncation=True,
-        padding="max_length",
-    )
-    outputs = model.generate(
-        inputs["input_ids"],
-        max_length=1024,
-        min_length=130,
-        length_penalty=3.0,
-        num_beams=8,
-        early_stopping=True,
-        repetition_penalty=3.0,
-        no_repeat_ngram_size=3
-    )
-    return remove_incomplete_sentence(tokenizer.decode(outputs[0], skip_special_tokens=True))
-# Set up Gradio Interface
 iface = gr.Interface(
-    fn=genarate_summary,
-    inputs="text",
-    outputs="text",
-    title="Greek Text Summarizer",
-    description="Enter an article in Greek, and this tool will generate a summary."
 )
-# Launch the Gradio Interface
 iface.launch()

 import gradio as gr
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
 # Load the model and tokenizer
+model_name = 'IMISLab/GreekT5-umt5-base-greeksum'
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+# Set up the summarizer pipeline
+summarizer = pipeline(
+    'summarization',
+    model=model,
+    tokenizer=tokenizer,
+    device=-1,  # -1 for CPU; set to 0 for GPU if available
+    max_new_tokens=128,
+    truncation=True
+)
+# Define the summarization function
+def summarize_text(text):
+    output = summarizer('summarize: ' + text)
+    return output[0]['summary_text']
+# Create a Gradio interface
 iface = gr.Interface(
+    fn=summarize_text,            # Function to run
+    inputs=gr.Textbox(label="Enter Greek Text", placeholder="Type or paste your text here..."),  # Input component
+    outputs=gr.Textbox(label="Summary", interactive=True),  # Output component
+    title="Greek Text Summarization",  # Title for the UI
+    description="This app uses a pre-trained Greek summarization model to generate a brief summary of your input text.",  # Description
+    allow_flagging="never"         # Optional: Disable flagging feature
 )
+# Launch the interface
 iface.launch()