Spaces:

songhieng
/

khmer-mt5-summarization-interface

Sleeping

App Files Files Community

khmer-mt5-summarization-interface / app.py

songhieng

Update app.py

9599706 verified about 1 month ago

raw

history blame

2.25 kB

	import streamlit as st
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

	# 1. Model identifier
	MODEL_ID = "songhieng/khmer-mt5-summarization"

	# 2. Load tokenizer (you can choose fast or slow; fast is the default)
	@st.cache_resource
	def load_tokenizer_and_model(model_id):
	tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
	model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
	return tokenizer, model

	tokenizer, model = load_tokenizer_and_model(MODEL_ID)

	# 3. Streamlit page config
	st.set_page_config(
	page_title="Khmer Text Summarization",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# 4. App header
	st.title("📝 Khmer Text Summarization")
	st.write("Paste your Khmer text below and click Summarize to get a concise summary.")

	# 5. Sidebar summarization settings
	st.sidebar.header("Summarization Settings")
	max_length = st.sidebar.slider(
	"Maximum summary length", 50, 300, 150, step=10
	)
	min_length = st.sidebar.slider(
	"Minimum summary length", 10, 100, 30, step=5
	)
	num_beams = st.sidebar.slider(
	"Beam search width", 1, 10, 4, step=1
	)

	# 6. Text input
	user_input = st.text_area(
	"Enter Khmer text here…",
	height=300,
	placeholder="សូមវាយអត្ថបទខ្មែរនៅទីនេះ…"
	)

	# 7. Summarize button
	if st.button("Summarize"):
	if not user_input.strip():
	st.warning("⚠️ Please enter some text to summarize.")
	else:
	with st.spinner("Generating summary…"):
	# Tokenize
	inputs = tokenizer(
	user_input,
	return_tensors="pt",
	truncation=True,
	padding="longest"
	)
	# Generate
	summary_ids = model.generate(
	**inputs,
	max_length=max_length,
	min_length=min_length,
	num_beams=num_beams,
	length_penalty=2.0,
	early_stopping=True
	)
	# Decode
	summary = tokenizer.decode(
	summary_ids[0],
	skip_special_tokens=True
	)
	# Display
	st.subheader("🔖 Summary:")
	st.write(summary)