Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,86 +5,52 @@ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
|
| 5 |
tokenizer = AutoTokenizer.from_pretrained("suriya7/bart-finetuned-text-summarization")
|
| 6 |
model = AutoModelForSeq2SeqLM.from_pretrained("suriya7/bart-finetuned-text-summarization")
|
| 7 |
|
| 8 |
-
def
|
| 9 |
try:
|
| 10 |
-
# Combine prompt with the text to guide the summarization
|
| 11 |
-
combined_input = f"Prompt: {prompt}\n\nText: {text}"
|
| 12 |
-
|
| 13 |
# Tokenize input with truncation to fit model requirements
|
| 14 |
-
inputs = tokenizer([
|
| 15 |
|
| 16 |
# Generate summary
|
| 17 |
-
summary_ids = model.generate(inputs['input_ids'],
|
| 18 |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
| 19 |
|
| 20 |
-
|
| 21 |
-
user_stories = format_as_user_stories(summary)
|
| 22 |
-
return user_stories
|
| 23 |
except Exception as e:
|
| 24 |
st.error(f"An error occurred: {e}")
|
| 25 |
return ""
|
| 26 |
|
| 27 |
-
def format_as_user_stories(summary):
|
| 28 |
-
user_stories = []
|
| 29 |
-
lines = summary.split('. ')
|
| 30 |
-
|
| 31 |
-
for line in lines:
|
| 32 |
-
line = line.strip()
|
| 33 |
-
if "as a" in line.lower() and "i want" in line.lower():
|
| 34 |
-
# Extract the parts of the user story
|
| 35 |
-
parts = line.split("so that")
|
| 36 |
-
if len(parts) == 2:
|
| 37 |
-
story = {
|
| 38 |
-
"As a": parts[0].strip().capitalize(),
|
| 39 |
-
"I want to": parts[1].strip().capitalize(),
|
| 40 |
-
"So that": parts[1].strip().capitalize()
|
| 41 |
-
}
|
| 42 |
-
user_stories.append(story)
|
| 43 |
-
|
| 44 |
-
# Format user stories
|
| 45 |
-
formatted_stories = ""
|
| 46 |
-
for story in user_stories:
|
| 47 |
-
formatted_stories += f"**User Story:**\n\n" \
|
| 48 |
-
f"**As a:** {story['As a']}\n" \
|
| 49 |
-
f"**I want to:** {story['I want to']}\n" \
|
| 50 |
-
f"**So that:** {story['So that']}\n\n"
|
| 51 |
-
|
| 52 |
-
return formatted_stories
|
| 53 |
-
|
| 54 |
# Initialize session state for input history if it doesn't exist
|
| 55 |
if 'input_history' not in st.session_state:
|
| 56 |
st.session_state['input_history'] = []
|
| 57 |
|
| 58 |
# Streamlit interface
|
| 59 |
-
st.title("
|
| 60 |
|
| 61 |
# User text inputs
|
| 62 |
bulk_text = st.text_area("Enter the bulk text (e.g., client calls, meeting transcripts)", height=300)
|
| 63 |
-
prompt = st.text_input("Enter the prompt for the user stories", "Extract user stories from the following text.")
|
| 64 |
|
| 65 |
-
if st.button("
|
| 66 |
-
if bulk_text
|
| 67 |
-
with st.spinner("Generating
|
| 68 |
-
|
| 69 |
|
| 70 |
-
if
|
| 71 |
-
# Save the input and
|
| 72 |
-
st.session_state['input_history'].append({"text": bulk_text, "
|
| 73 |
-
st.subheader("
|
| 74 |
-
st.write(
|
| 75 |
else:
|
| 76 |
-
st.warning("No
|
| 77 |
else:
|
| 78 |
-
st.warning("Please enter
|
| 79 |
|
| 80 |
-
# Display the history of inputs and
|
| 81 |
if st.session_state['input_history']:
|
| 82 |
st.subheader("History")
|
| 83 |
for i, entry in enumerate(st.session_state['input_history']):
|
| 84 |
st.write(f"**Input {i+1} (Text):** {entry['text']}")
|
| 85 |
-
st.write(f"**
|
| 86 |
-
st.write(f"**User Stories {i+1}:** {entry['user_stories']}")
|
| 87 |
st.write("---")
|
| 88 |
|
| 89 |
# Instructions for using the app
|
| 90 |
-
st.write("Enter your bulk text and
|
|
|
|
| 5 |
tokenizer = AutoTokenizer.from_pretrained("suriya7/bart-finetuned-text-summarization")
|
| 6 |
model = AutoModelForSeq2SeqLM.from_pretrained("suriya7/bart-finetuned-text-summarization")
|
| 7 |
|
| 8 |
+
def summarize_text(text):
|
| 9 |
try:
|
|
|
|
|
|
|
|
|
|
| 10 |
# Tokenize input with truncation to fit model requirements
|
| 11 |
+
inputs = tokenizer([text], max_length=1024, return_tensors='pt', truncation=True)
|
| 12 |
|
| 13 |
# Generate summary
|
| 14 |
+
summary_ids = model.generate(inputs['input_ids'], max_length=150, num_beams=4, early_stopping=True)
|
| 15 |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
| 16 |
|
| 17 |
+
return summary
|
|
|
|
|
|
|
| 18 |
except Exception as e:
|
| 19 |
st.error(f"An error occurred: {e}")
|
| 20 |
return ""
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
# Initialize session state for input history if it doesn't exist
|
| 23 |
if 'input_history' not in st.session_state:
|
| 24 |
st.session_state['input_history'] = []
|
| 25 |
|
| 26 |
# Streamlit interface
|
| 27 |
+
st.title("Text Summarizer")
|
| 28 |
|
| 29 |
# User text inputs
|
| 30 |
bulk_text = st.text_area("Enter the bulk text (e.g., client calls, meeting transcripts)", height=300)
|
|
|
|
| 31 |
|
| 32 |
+
if st.button("Summarize Text"):
|
| 33 |
+
if bulk_text:
|
| 34 |
+
with st.spinner("Generating summary..."):
|
| 35 |
+
summary = summarize_text(bulk_text)
|
| 36 |
|
| 37 |
+
if summary:
|
| 38 |
+
# Save the input and summary to the session state history
|
| 39 |
+
st.session_state['input_history'].append({"text": bulk_text, "summary": summary})
|
| 40 |
+
st.subheader("Summary:")
|
| 41 |
+
st.write(summary)
|
| 42 |
else:
|
| 43 |
+
st.warning("No summary was generated. Please check the input and try again.")
|
| 44 |
else:
|
| 45 |
+
st.warning("Please enter the bulk text.")
|
| 46 |
|
| 47 |
+
# Display the history of inputs and summaries
|
| 48 |
if st.session_state['input_history']:
|
| 49 |
st.subheader("History")
|
| 50 |
for i, entry in enumerate(st.session_state['input_history']):
|
| 51 |
st.write(f"**Input {i+1} (Text):** {entry['text']}")
|
| 52 |
+
st.write(f"**Summary {i+1}:** {entry['summary']}")
|
|
|
|
| 53 |
st.write("---")
|
| 54 |
|
| 55 |
# Instructions for using the app
|
| 56 |
+
st.write("Enter your bulk text and click 'Summarize Text' to get a summary of the text.")
|