Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,86 +5,52 @@ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
|
5 |
tokenizer = AutoTokenizer.from_pretrained("suriya7/bart-finetuned-text-summarization")
|
6 |
model = AutoModelForSeq2SeqLM.from_pretrained("suriya7/bart-finetuned-text-summarization")
|
7 |
|
8 |
-
def
|
9 |
try:
|
10 |
-
# Combine prompt with the text to guide the summarization
|
11 |
-
combined_input = f"Prompt: {prompt}\n\nText: {text}"
|
12 |
-
|
13 |
# Tokenize input with truncation to fit model requirements
|
14 |
-
inputs = tokenizer([
|
15 |
|
16 |
# Generate summary
|
17 |
-
summary_ids = model.generate(inputs['input_ids'],
|
18 |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
19 |
|
20 |
-
|
21 |
-
user_stories = format_as_user_stories(summary)
|
22 |
-
return user_stories
|
23 |
except Exception as e:
|
24 |
st.error(f"An error occurred: {e}")
|
25 |
return ""
|
26 |
|
27 |
-
def format_as_user_stories(summary):
|
28 |
-
user_stories = []
|
29 |
-
lines = summary.split('. ')
|
30 |
-
|
31 |
-
for line in lines:
|
32 |
-
line = line.strip()
|
33 |
-
if "as a" in line.lower() and "i want" in line.lower():
|
34 |
-
# Extract the parts of the user story
|
35 |
-
parts = line.split("so that")
|
36 |
-
if len(parts) == 2:
|
37 |
-
story = {
|
38 |
-
"As a": parts[0].strip().capitalize(),
|
39 |
-
"I want to": parts[1].strip().capitalize(),
|
40 |
-
"So that": parts[1].strip().capitalize()
|
41 |
-
}
|
42 |
-
user_stories.append(story)
|
43 |
-
|
44 |
-
# Format user stories
|
45 |
-
formatted_stories = ""
|
46 |
-
for story in user_stories:
|
47 |
-
formatted_stories += f"**User Story:**\n\n" \
|
48 |
-
f"**As a:** {story['As a']}\n" \
|
49 |
-
f"**I want to:** {story['I want to']}\n" \
|
50 |
-
f"**So that:** {story['So that']}\n\n"
|
51 |
-
|
52 |
-
return formatted_stories
|
53 |
-
|
54 |
# Initialize session state for input history if it doesn't exist
|
55 |
if 'input_history' not in st.session_state:
|
56 |
st.session_state['input_history'] = []
|
57 |
|
58 |
# Streamlit interface
|
59 |
-
st.title("
|
60 |
|
61 |
# User text inputs
|
62 |
bulk_text = st.text_area("Enter the bulk text (e.g., client calls, meeting transcripts)", height=300)
|
63 |
-
prompt = st.text_input("Enter the prompt for the user stories", "Extract user stories from the following text.")
|
64 |
|
65 |
-
if st.button("
|
66 |
-
if bulk_text
|
67 |
-
with st.spinner("Generating
|
68 |
-
|
69 |
|
70 |
-
if
|
71 |
-
# Save the input and
|
72 |
-
st.session_state['input_history'].append({"text": bulk_text, "
|
73 |
-
st.subheader("
|
74 |
-
st.write(
|
75 |
else:
|
76 |
-
st.warning("No
|
77 |
else:
|
78 |
-
st.warning("Please enter
|
79 |
|
80 |
-
# Display the history of inputs and
|
81 |
if st.session_state['input_history']:
|
82 |
st.subheader("History")
|
83 |
for i, entry in enumerate(st.session_state['input_history']):
|
84 |
st.write(f"**Input {i+1} (Text):** {entry['text']}")
|
85 |
-
st.write(f"**
|
86 |
-
st.write(f"**User Stories {i+1}:** {entry['user_stories']}")
|
87 |
st.write("---")
|
88 |
|
89 |
# Instructions for using the app
|
90 |
-
st.write("Enter your bulk text and
|
|
|
5 |
tokenizer = AutoTokenizer.from_pretrained("suriya7/bart-finetuned-text-summarization")
|
6 |
model = AutoModelForSeq2SeqLM.from_pretrained("suriya7/bart-finetuned-text-summarization")
|
7 |
|
8 |
+
def summarize_text(text):
|
9 |
try:
|
|
|
|
|
|
|
10 |
# Tokenize input with truncation to fit model requirements
|
11 |
+
inputs = tokenizer([text], max_length=1024, return_tensors='pt', truncation=True)
|
12 |
|
13 |
# Generate summary
|
14 |
+
summary_ids = model.generate(inputs['input_ids'], max_length=150, num_beams=4, early_stopping=True)
|
15 |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
16 |
|
17 |
+
return summary
|
|
|
|
|
18 |
except Exception as e:
|
19 |
st.error(f"An error occurred: {e}")
|
20 |
return ""
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
# Initialize session state for input history if it doesn't exist
|
23 |
if 'input_history' not in st.session_state:
|
24 |
st.session_state['input_history'] = []
|
25 |
|
26 |
# Streamlit interface
|
27 |
+
st.title("Text Summarizer")
|
28 |
|
29 |
# User text inputs
|
30 |
bulk_text = st.text_area("Enter the bulk text (e.g., client calls, meeting transcripts)", height=300)
|
|
|
31 |
|
32 |
+
if st.button("Summarize Text"):
|
33 |
+
if bulk_text:
|
34 |
+
with st.spinner("Generating summary..."):
|
35 |
+
summary = summarize_text(bulk_text)
|
36 |
|
37 |
+
if summary:
|
38 |
+
# Save the input and summary to the session state history
|
39 |
+
st.session_state['input_history'].append({"text": bulk_text, "summary": summary})
|
40 |
+
st.subheader("Summary:")
|
41 |
+
st.write(summary)
|
42 |
else:
|
43 |
+
st.warning("No summary was generated. Please check the input and try again.")
|
44 |
else:
|
45 |
+
st.warning("Please enter the bulk text.")
|
46 |
|
47 |
+
# Display the history of inputs and summaries
|
48 |
if st.session_state['input_history']:
|
49 |
st.subheader("History")
|
50 |
for i, entry in enumerate(st.session_state['input_history']):
|
51 |
st.write(f"**Input {i+1} (Text):** {entry['text']}")
|
52 |
+
st.write(f"**Summary {i+1}:** {entry['summary']}")
|
|
|
53 |
st.write("---")
|
54 |
|
55 |
# Instructions for using the app
|
56 |
+
st.write("Enter your bulk text and click 'Summarize Text' to get a summary of the text.")
|