File size: 1,848 Bytes
02a4337 8ef53b5 02a4337 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# Load tokenizer and model
model_identifier = "songhieng/khmer-mt5-summarization"
tokenizer = AutoTokenizer.from_pretrained(model_identifier, use_fast=False)
model = AutoModelForSeq2SeqLM.from_pretrained(model_identifier, use_fast=False)
# Set page configuration
st.set_page_config(page_title="Khmer Text Summarization", layout="wide")
# App title and description
st.title("Khmer Text Summarization")
st.write("Enter Khmer text below to generate a concise summary.")
# Text input
user_input = st.text_area("Input Text:", height=300)
# Summarization parameters
st.sidebar.header("Summarization Settings")
max_length = st.sidebar.slider("Maximum Summary Length", min_value=50, max_value=300, value=150, step=10)
min_length = st.sidebar.slider("Minimum Summary Length", min_value=10, max_value=100, value=30, step=5)
num_beams = st.sidebar.slider("Number of Beams", min_value=1, max_value=10, value=4, step=1)
# Summarize button
if st.button("Summarize"):
if user_input.strip():
try:
# Tokenize input
inputs = tokenizer.encode(user_input, return_tensors="pt", truncation=True)
# Generate summary
summary_ids = model.generate(
inputs,
max_length=max_length,
min_length=min_length,
num_beams=num_beams,
length_penalty=2.0,
early_stopping=True
)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
# Display summary
st.subheader("Summary:")
st.write(summary)
except Exception as e:
st.error(f"An error occurred during summarization: {e}")
else:
st.warning("Please enter some text to summarize.")
|