Spaces:

tonyhui2234
/

Group22_Project

Paused

App Files Files Community

tonyhui2234 commited on Mar 23

Commit

833a88b

verified ·

1 Parent(s): b1e9f49

Update app.py

Browse files

Files changed (1) hide show

app.py +111 -38

app.py CHANGED Viewed

@@ -1,24 +1,22 @@
-import streamlit as st
-import random
-import pandas as pd
-import requests
-from io import BytesIO
-from PIL import Image
-from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
-import re
 # Define maximum dimensions for the fortune image (in pixels)
 MAX_SIZE = (400, 400)
-# Initialize button click count in session state
 if "button_count_temp" not in st.session_state:
     st.session_state.button_count_temp = 0
-# Set page configuration
-st.set_page_config(page_title="Fortuen Stick Enquiry", layout="wide")
-st.title("Fortuen Stick Enquiry")
-# Initialize session state variables
 if "submitted_text" not in st.session_state:
     st.session_state.submitted_text = False
 if "fortune_number" not in st.session_state:
@@ -32,6 +30,7 @@ if "cfu_explain_text" not in st.session_state:
 if "stick_clicked" not in st.session_state:
     st.session_state.stick_clicked = False
 if "fortune_data" not in st.session_state:
     try:
         st.session_state.fortune_data = pd.read_csv("/home/user/app/resources/detail.csv")
@@ -39,21 +38,69 @@ if "fortune_data" not in st.session_state:
         st.error(f"Error loading CSV: {e}")
         st.session_state.fortune_data = None
 def load_finetuned_classifier_model(question):
     label_list = ["Geomancy", "Lost Property", "Personal Well-Being", "Future Prospect", "Traveling"]
-    # Create a mapping dictionary to convert the default "LABEL_x" output.
     mapping = {f"LABEL_{i}": label for i, label in enumerate(label_list)}
-    pipe = pipeline("text-classification", model="tonyhui2234/CustomModel_classifier_model_10")
-    prediction = pipe(question)[0]['label']
     predicted_label = mapping.get(prediction, prediction)
     print(predicted_label)
     return predicted_label
-# Define your inference function
 def generate_answer(question, fortune):
-    tokenizer = AutoTokenizer.from_pretrained("tonyhui2234/finetuned_model_text_gen")
-    model = AutoModelForSeq2SeqLM.from_pretrained("tonyhui2234/finetuned_model_text_gen")
     input_text = "Question: " + question + " Fortune: " + fortune
     inputs = tokenizer(input_text, return_tensors="pt", truncation=True)
     outputs = model.generate(
@@ -68,27 +115,38 @@ def generate_answer(question, fortune):
     return answer
 def analysis(row_detail, classifiy, question):
-    # Use the classifier's output (e.g. "Personal Well-Being") in the regex.
     pattern = re.compile(re.escape(classifiy) + r":\s*(.*?)(?:\.|$)", re.IGNORECASE)
     match = pattern.search(row_detail)
     if match:
         result = match.group(1)
-        # If you want to generate a custom answer, you can call generate_answer()
         return generate_answer(question, result)
     else:
         return "Heaven's secret cannot be revealed."
 def check_sentence_is_english_model(question):
-    pipe_english = pipeline("text-classification", model="papluca/xlm-roberta-base-language-detection")
     return pipe_english(question)[0]['label'] == 'en'
 def check_sentence_is_question_model(question):
-    pipe_question = pipeline("text-classification", model="shahrukhx01/question-vs-statement-classifier")
     return pipe_question(question)[0]['label'] == 'LABEL_1'
 def submit_text_callback():
     question = st.session_state.get("user_sentence", "")
-    # Clear any previous error message
     st.session_state.error_message = ""
     if not check_sentence_is_english_model(question):
@@ -107,12 +165,12 @@ def submit_text_callback():
         return
     st.session_state.submitted_text = True
-    st.session_state.button_count_temp = 0  # Reset the counter once submission is accepted
-    # Randomly generate a number from 1 to 100
     st.session_state.fortune_number = random.randint(1, 100)
-    # Look up the row in the CSV where CNumber matches the generated fortune number.
     df = st.session_state.fortune_data
     row_detail = ''
     if df is not None:
@@ -138,6 +196,9 @@ def submit_text_callback():
     print(row_detail)
 def load_and_resize_image(path, max_size=MAX_SIZE):
     try:
         img = Image.open(path)
         img.thumbnail(max_size, Image.Resampling.LANCZOS)
@@ -147,6 +208,9 @@ def load_and_resize_image(path, max_size=MAX_SIZE):
         return None
 def download_and_resize_image(url, max_size=MAX_SIZE):
     try:
         response = requests.get(url)
         response.raise_for_status()
@@ -159,24 +223,32 @@ def download_and_resize_image(url, max_size=MAX_SIZE):
         return None
 def stick_enquiry_callback():
-    # Retrieve the user's question and the fortune detail
     question = st.session_state.get("user_sentence", "")
     if not st.session_state.fortune_row:
         st.error("Fortune data is not available. Please submit your question first.")
         return
     row_detail = st.session_state.fortune_row.get("Detail", "No detail available.")
-    # Run the classifier model after the image has loaded
     classifiy = load_finetuned_classifier_model(question)
-    # Generate the explanation using the analysis function
     cfu_explain = analysis(row_detail, classifiy, question)
-    # Save the returned value in session state for later display
     st.session_state.cfu_explain_text = cfu_explain
     st.session_state.stick_clicked = True
-# Main layout: Left (input) and Right (fortune display)
 left_col, _, right_col = st.columns([3, 1, 5])
-# ---- Left Column ----
 with left_col:
     left_top = st.container()
     left_bottom = st.container()
@@ -187,16 +259,17 @@ with left_col:
             st.error(st.session_state.error_message)
     if st.session_state.submitted_text:
         with left_bottom:
             for _ in range(5):
                 st.write("")
             col1, col2, col3 = st.columns(3)
             with col2:
                 st.button("Cfu Explain", key="stick_button", on_click=stick_enquiry_callback)
             if st.session_state.stick_clicked:
-                # Display the explanation text saved from analysis()
                 st.text_area(' ', value=st.session_state.cfu_explain_text, height=300, disabled=True)
-# ---- Right Column ----
 with right_col:
     with st.container():
         col_left, col_center, col_right = st.columns([1, 2, 1])

+import streamlit as st          # For creating the web app interface
+import random                   # For generating random fortune numbers
+import pandas as pd             # For handling CSV data
+import requests                 # For downloading images from URLs
+from io import BytesIO          # For handling image bytes
+from PIL import Image           # For image processing
+from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM  # For NLP models
+import re                       # For regex operations
+# This script implements a Fortune Stick Enquiry app.
+# Users enter a question, which is validated and processed.
+# A random fortune is chosen from a CSV, and NLP models classify and generate custom answers.
 # Define maximum dimensions for the fortune image (in pixels)
 MAX_SIZE = (400, 400)
+# Initialize session state variables for button clicks, fortune details, etc.
 if "button_count_temp" not in st.session_state:
     st.session_state.button_count_temp = 0
 if "submitted_text" not in st.session_state:
     st.session_state.submitted_text = False
 if "fortune_number" not in st.session_state:
 if "stick_clicked" not in st.session_state:
     st.session_state.stick_clicked = False
+# Load fortune data from CSV file
 if "fortune_data" not in st.session_state:
     try:
         st.session_state.fortune_data = pd.read_csv("/home/user/app/resources/detail.csv")
         st.error(f"Error loading CSV: {e}")
         st.session_state.fortune_data = None
+# ----------------------------------------------------
+# CACHED MODEL LOADING FUNCTIONS
+# ----------------------------------------------------
+@st.cache_resource
+def load_classifier_pipeline():
+    """
+    Load and cache the finetuned classifier pipeline.
+    This model classifies the input question into one of the fortune categories.
+    """
+    return pipeline("text-classification", model="tonyhui2234/CustomModel_classifier_model_10")
+@st.cache_resource
+def load_tokenizer_and_model():
+    """
+    Load and cache the tokenizer and model for generating custom answers.
+    Uses a finetuned sequence-to-sequence model from Hugging Face.
+    """
+    tokenizer = AutoTokenizer.from_pretrained("tonyhui2234/finetuned_model_text_gen")
+    model = AutoModelForSeq2SeqLM.from_pretrained("tonyhui2234/finetuned_model_text_gen")
+    return tokenizer, model
+@st.cache_resource
+def load_english_detection_pipeline():
+    """
+    Load and cache the English language detection pipeline.
+    This ensures that the user's question is in English.
+    """
+    return pipeline("text-classification", model="papluca/xlm-roberta-base-language-detection")
+@st.cache_resource
+def load_question_detection_pipeline():
+    """
+    Load and cache the question vs. statement detection pipeline.
+    This checks if the input text is a question.
+    """
+    return pipeline("text-classification", model="shahrukhx01/question-vs-statement-classifier")
+# ----------------------------------------------------
+# FUNCTION DEFINITIONS
+# ----------------------------------------------------
 def load_finetuned_classifier_model(question):
+    """
+    Classify the input question into a specific fortune category.
+    Maps the classifier's output label to a human-readable format.
+    """
     label_list = ["Geomancy", "Lost Property", "Personal Well-Being", "Future Prospect", "Traveling"]
+    # Mapping dictionary to convert the default "LABEL_x" output.
     mapping = {f"LABEL_{i}": label for i, label in enumerate(label_list)}
+    classifier_pipe = load_classifier_pipeline()
+    prediction = classifier_pipe(question)[0]['label']
     predicted_label = mapping.get(prediction, prediction)
     print(predicted_label)
     return predicted_label
 def generate_answer(question, fortune):
+    """
+    Generate a custom answer using a finetuned sequence-to-sequence model.
+    Combines the user's question with the fortune message to produce a response.
+    """
+    tokenizer, model = load_tokenizer_and_model()
     input_text = "Question: " + question + " Fortune: " + fortune
     inputs = tokenizer(input_text, return_tensors="pt", truncation=True)
     outputs = model.generate(
     return answer
 def analysis(row_detail, classifiy, question):
+    """
+    Analyze the fortune detail based on the classifier's output.
+    Extracts the specific fortune message using regex and generates an answer.
+    """
     pattern = re.compile(re.escape(classifiy) + r":\s*(.*?)(?:\.|$)", re.IGNORECASE)
     match = pattern.search(row_detail)
     if match:
         result = match.group(1)
         return generate_answer(question, result)
     else:
         return "Heaven's secret cannot be revealed."
 def check_sentence_is_english_model(question):
+    """
+    Check if the input question is in English using a language detection model.
+    """
+    pipe_english = load_english_detection_pipeline()
     return pipe_english(question)[0]['label'] == 'en'
 def check_sentence_is_question_model(question):
+    """
+    Check if the input text is a question using a question vs. statement classifier.
+    """
+    pipe_question = load_question_detection_pipeline()
     return pipe_question(question)[0]['label'] == 'LABEL_1'
 def submit_text_callback():
+    """
+    Callback function executed when the user submits their question.
+    Validates the input and retrieves a corresponding fortune based on a random number.
+    """
     question = st.session_state.get("user_sentence", "")
     st.session_state.error_message = ""
     if not check_sentence_is_english_model(question):
         return
     st.session_state.submitted_text = True
+    st.session_state.button_count_temp = 0  # Reset the counter after submission
+    # Randomly generate a fortune stick number between 1 and 100
     st.session_state.fortune_number = random.randint(1, 100)
+    # Retrieve fortune details from CSV data
     df = st.session_state.fortune_data
     row_detail = ''
     if df is not None:
     print(row_detail)
 def load_and_resize_image(path, max_size=MAX_SIZE):
+    """
+    Load an image from a local path and resize it to fit within MAX_SIZE.
+    """
     try:
         img = Image.open(path)
         img.thumbnail(max_size, Image.Resampling.LANCZOS)
         return None
 def download_and_resize_image(url, max_size=MAX_SIZE):
+    """
+    Download an image from a URL and resize it to fit within MAX_SIZE.
+    """
     try:
         response = requests.get(url)
         response.raise_for_status()
         return None
 def stick_enquiry_callback():
+    """
+    Callback function executed when the user clicks "Cfu Explain".
+    Uses the classifier to analyze the fortune details and generate a custom answer.
+    """
     question = st.session_state.get("user_sentence", "")
     if not st.session_state.fortune_row:
         st.error("Fortune data is not available. Please submit your question first.")
         return
     row_detail = st.session_state.fortune_row.get("Detail", "No detail available.")
     classifiy = load_finetuned_classifier_model(question)
     cfu_explain = analysis(row_detail, classifiy, question)
     st.session_state.cfu_explain_text = cfu_explain
     st.session_state.stick_clicked = True
+# ----------------------------------------------------
+# STREAMLIT APP LAYOUT
+# ----------------------------------------------------
+# Set page configuration and title
+st.set_page_config(page_title="Fortuen Stick Enquiry", layout="wide")
+st.title("Fortuen Stick Enquiry")
+# Define the main layout columns: Left for user input, Right for fortune display
 left_col, _, right_col = st.columns([3, 1, 5])
+# ---- Left Column: User Input and Interaction ----
 with left_col:
     left_top = st.container()
     left_bottom = st.container()
             st.error(st.session_state.error_message)
     if st.session_state.submitted_text:
         with left_bottom:
+            # Add spacing
             for _ in range(5):
                 st.write("")
             col1, col2, col3 = st.columns(3)
             with col2:
                 st.button("Cfu Explain", key="stick_button", on_click=stick_enquiry_callback)
             if st.session_state.stick_clicked:
+                # Display the generated explanation text
                 st.text_area(' ', value=st.session_state.cfu_explain_text, height=300, disabled=True)
+# ---- Right Column: Fortune Display and Images ----
 with right_col:
     with st.container():
         col_left, col_center, col_right = st.columns([1, 2, 1])