songhieng commited on
Commit
26f89cf
Β·
verified Β·
1 Parent(s): 9599706

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -25
app.py CHANGED
@@ -1,10 +1,19 @@
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
 
4
- # 1. Model identifier
5
  MODEL_ID = "songhieng/khmer-mt5-summarization"
6
 
7
- # 2. Load tokenizer (you can choose fast or slow; fast is the default)
8
  @st.cache_resource
9
  def load_tokenizer_and_model(model_id):
10
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
@@ -13,33 +22,20 @@ def load_tokenizer_and_model(model_id):
13
 
14
  tokenizer, model = load_tokenizer_and_model(MODEL_ID)
15
 
16
- # 3. Streamlit page config
17
- st.set_page_config(
18
- page_title="Khmer Text Summarization",
19
- layout="wide",
20
- initial_sidebar_state="expanded"
21
- )
22
-
23
  # 4. App header
24
  st.title("πŸ“ Khmer Text Summarization")
25
  st.write("Paste your Khmer text below and click **Summarize** to get a concise summary.")
26
 
27
  # 5. Sidebar summarization settings
28
  st.sidebar.header("Summarization Settings")
29
- max_length = st.sidebar.slider(
30
- "Maximum summary length", 50, 300, 150, step=10
31
- )
32
- min_length = st.sidebar.slider(
33
- "Minimum summary length", 10, 100, 30, step=5
34
- )
35
- num_beams = st.sidebar.slider(
36
- "Beam search width", 1, 10, 4, step=1
37
- )
38
 
39
  # 6. Text input
40
  user_input = st.text_area(
41
- "Enter Khmer text here…",
42
- height=300,
43
  placeholder="αžŸαžΌαž˜αžœαžΆαž™αž’αžαŸ’αžαž”αž‘αžαŸ’αž˜αŸ‚αžšαž“αŸ…αž‘αžΈαž“αŸαŸ‡β€¦"
44
  )
45
 
@@ -49,14 +45,14 @@ if st.button("Summarize"):
49
  st.warning("⚠️ Please enter some text to summarize.")
50
  else:
51
  with st.spinner("Generating summary…"):
52
- # Tokenize
53
  inputs = tokenizer(
54
  user_input,
55
  return_tensors="pt",
56
  truncation=True,
57
  padding="longest"
58
  )
59
- # Generate
60
  summary_ids = model.generate(
61
  **inputs,
62
  max_length=max_length,
@@ -65,11 +61,10 @@ if st.button("Summarize"):
65
  length_penalty=2.0,
66
  early_stopping=True
67
  )
68
- # Decode
69
  summary = tokenizer.decode(
70
- summary_ids[0],
71
  skip_special_tokens=True
72
  )
73
- # Display
74
  st.subheader("πŸ”– Summary:")
75
  st.write(summary)
 
1
  import streamlit as st
2
+
3
+ # 1. Streamlit page config MUST be the first Streamlit command
4
+ st.set_page_config(
5
+ page_title="Khmer Text Summarization",
6
+ page_icon="πŸ“",
7
+ layout="wide",
8
+ initial_sidebar_state="expanded"
9
+ )
10
+
11
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
12
 
13
+ # 2. Model identifier
14
  MODEL_ID = "songhieng/khmer-mt5-summarization"
15
 
16
+ # 3. Load tokenizer & model, cached to avoid reloading every run
17
  @st.cache_resource
18
  def load_tokenizer_and_model(model_id):
19
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
 
22
 
23
  tokenizer, model = load_tokenizer_and_model(MODEL_ID)
24
 
 
 
 
 
 
 
 
25
  # 4. App header
26
  st.title("πŸ“ Khmer Text Summarization")
27
  st.write("Paste your Khmer text below and click **Summarize** to get a concise summary.")
28
 
29
  # 5. Sidebar summarization settings
30
  st.sidebar.header("Summarization Settings")
31
+ max_length = st.sidebar.slider("Maximum summary length", 50, 300, 150, step=10)
32
+ min_length = st.sidebar.slider("Minimum summary length", 10, 100, 30, step=5)
33
+ num_beams = st.sidebar.slider("Beam search width", 1, 10, 4, step=1)
 
 
 
 
 
 
34
 
35
  # 6. Text input
36
  user_input = st.text_area(
37
+ "Enter Khmer text here…",
38
+ height=300,
39
  placeholder="αžŸαžΌαž˜αžœαžΆαž™αž’αžαŸ’αžαž”αž‘αžαŸ’αž˜αŸ‚αžšαž“αŸ…αž‘αžΈαž“αŸαŸ‡β€¦"
40
  )
41
 
 
45
  st.warning("⚠️ Please enter some text to summarize.")
46
  else:
47
  with st.spinner("Generating summary…"):
48
+ # Tokenize the input text
49
  inputs = tokenizer(
50
  user_input,
51
  return_tensors="pt",
52
  truncation=True,
53
  padding="longest"
54
  )
55
+ # Generate the summary
56
  summary_ids = model.generate(
57
  **inputs,
58
  max_length=max_length,
 
61
  length_penalty=2.0,
62
  early_stopping=True
63
  )
64
+ # Decode and display
65
  summary = tokenizer.decode(
66
+ summary_ids[0],
67
  skip_special_tokens=True
68
  )
 
69
  st.subheader("πŸ”– Summary:")
70
  st.write(summary)