Spaces:

tonyhui2234
/

Group22_Project

Paused

App Files Files Community

tonyhui2234 commited on Mar 23

Commit

424c696

verified ·

1 Parent(s): 833a88b

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -111

app.py CHANGED Viewed

@@ -1,22 +1,24 @@
-import streamlit as st          # For creating the web app interface
-import random                   # For generating random fortune numbers
-import pandas as pd             # For handling CSV data
-import requests                 # For downloading images from URLs
-from io import BytesIO          # For handling image bytes
-from PIL import Image           # For image processing
-from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM  # For NLP models
-import re                       # For regex operations
-# This script implements a Fortune Stick Enquiry app.
-# Users enter a question, which is validated and processed.
-# A random fortune is chosen from a CSV, and NLP models classify and generate custom answers.
 # Define maximum dimensions for the fortune image (in pixels)
 MAX_SIZE = (400, 400)
-# Initialize session state variables for button clicks, fortune details, etc.
 if "button_count_temp" not in st.session_state:
     st.session_state.button_count_temp = 0
 if "submitted_text" not in st.session_state:
     st.session_state.submitted_text = False
 if "fortune_number" not in st.session_state:
@@ -30,7 +32,6 @@ if "cfu_explain_text" not in st.session_state:
 if "stick_clicked" not in st.session_state:
     st.session_state.stick_clicked = False
-# Load fortune data from CSV file
 if "fortune_data" not in st.session_state:
     try:
         st.session_state.fortune_data = pd.read_csv("/home/user/app/resources/detail.csv")
@@ -38,69 +39,25 @@ if "fortune_data" not in st.session_state:
         st.error(f"Error loading CSV: {e}")
         st.session_state.fortune_data = None
-# ----------------------------------------------------
-# CACHED MODEL LOADING FUNCTIONS
-# ----------------------------------------------------
-@st.cache_resource
-def load_classifier_pipeline():
-    """
-    Load and cache the finetuned classifier pipeline.
-    This model classifies the input question into one of the fortune categories.
-    """
-    return pipeline("text-classification", model="tonyhui2234/CustomModel_classifier_model_10")
-@st.cache_resource
-def load_tokenizer_and_model():
-    """
-    Load and cache the tokenizer and model for generating custom answers.
-    Uses a finetuned sequence-to-sequence model from Hugging Face.
-    """
-    tokenizer = AutoTokenizer.from_pretrained("tonyhui2234/finetuned_model_text_gen")
-    model = AutoModelForSeq2SeqLM.from_pretrained("tonyhui2234/finetuned_model_text_gen")
-    return tokenizer, model
-@st.cache_resource
-def load_english_detection_pipeline():
-    """
-    Load and cache the English language detection pipeline.
-    This ensures that the user's question is in English.
-    """
-    return pipeline("text-classification", model="papluca/xlm-roberta-base-language-detection")
-@st.cache_resource
-def load_question_detection_pipeline():
-    """
-    Load and cache the question vs. statement detection pipeline.
-    This checks if the input text is a question.
-    """
-    return pipeline("text-classification", model="shahrukhx01/question-vs-statement-classifier")
-# ----------------------------------------------------
-# FUNCTION DEFINITIONS
-# ----------------------------------------------------
 def load_finetuned_classifier_model(question):
-    """
-    Classify the input question into a specific fortune category.
-    Maps the classifier's output label to a human-readable format.
-    """
     label_list = ["Geomancy", "Lost Property", "Personal Well-Being", "Future Prospect", "Traveling"]
-    # Mapping dictionary to convert the default "LABEL_x" output.
     mapping = {f"LABEL_{i}": label for i, label in enumerate(label_list)}
-    classifier_pipe = load_classifier_pipeline()
-    prediction = classifier_pipe(question)[0]['label']
     predicted_label = mapping.get(prediction, prediction)
     print(predicted_label)
     return predicted_label
 def generate_answer(question, fortune):
-    """
-    Generate a custom answer using a finetuned sequence-to-sequence model.
-    Combines the user's question with the fortune message to produce a response.
-    """
-    tokenizer, model = load_tokenizer_and_model()
     input_text = "Question: " + question + " Fortune: " + fortune
     inputs = tokenizer(input_text, return_tensors="pt", truncation=True)
     outputs = model.generate(
@@ -115,38 +72,27 @@ def generate_answer(question, fortune):
     return answer
 def analysis(row_detail, classifiy, question):
-    """
-    Analyze the fortune detail based on the classifier's output.
-    Extracts the specific fortune message using regex and generates an answer.
-    """
     pattern = re.compile(re.escape(classifiy) + r":\s*(.*?)(?:\.|$)", re.IGNORECASE)
     match = pattern.search(row_detail)
     if match:
         result = match.group(1)
         return generate_answer(question, result)
     else:
         return "Heaven's secret cannot be revealed."
 def check_sentence_is_english_model(question):
-    """
-    Check if the input question is in English using a language detection model.
-    """
-    pipe_english = load_english_detection_pipeline()
     return pipe_english(question)[0]['label'] == 'en'
 def check_sentence_is_question_model(question):
-    """
-    Check if the input text is a question using a question vs. statement classifier.
-    """
-    pipe_question = load_question_detection_pipeline()
     return pipe_question(question)[0]['label'] == 'LABEL_1'
 def submit_text_callback():
-    """
-    Callback function executed when the user submits their question.
-    Validates the input and retrieves a corresponding fortune based on a random number.
-    """
     question = st.session_state.get("user_sentence", "")
     st.session_state.error_message = ""
     if not check_sentence_is_english_model(question):
@@ -165,12 +111,12 @@ def submit_text_callback():
         return
     st.session_state.submitted_text = True
-    st.session_state.button_count_temp = 0  # Reset the counter after submission
-    # Randomly generate a fortune stick number between 1 and 100
     st.session_state.fortune_number = random.randint(1, 100)
-    # Retrieve fortune details from CSV data
     df = st.session_state.fortune_data
     row_detail = ''
     if df is not None:
@@ -196,9 +142,6 @@ def submit_text_callback():
     print(row_detail)
 def load_and_resize_image(path, max_size=MAX_SIZE):
-    """
-    Load an image from a local path and resize it to fit within MAX_SIZE.
-    """
     try:
         img = Image.open(path)
         img.thumbnail(max_size, Image.Resampling.LANCZOS)
@@ -208,9 +151,6 @@ def load_and_resize_image(path, max_size=MAX_SIZE):
         return None
 def download_and_resize_image(url, max_size=MAX_SIZE):
-    """
-    Download an image from a URL and resize it to fit within MAX_SIZE.
-    """
     try:
         response = requests.get(url)
         response.raise_for_status()
@@ -223,32 +163,24 @@ def download_and_resize_image(url, max_size=MAX_SIZE):
         return None
 def stick_enquiry_callback():
-    """
-    Callback function executed when the user clicks "Cfu Explain".
-    Uses the classifier to analyze the fortune details and generate a custom answer.
-    """
     question = st.session_state.get("user_sentence", "")
     if not st.session_state.fortune_row:
         st.error("Fortune data is not available. Please submit your question first.")
         return
     row_detail = st.session_state.fortune_row.get("Detail", "No detail available.")
     classifiy = load_finetuned_classifier_model(question)
     cfu_explain = analysis(row_detail, classifiy, question)
     st.session_state.cfu_explain_text = cfu_explain
     st.session_state.stick_clicked = True
-# ----------------------------------------------------
-# STREAMLIT APP LAYOUT
-# ----------------------------------------------------
-# Set page configuration and title
-st.set_page_config(page_title="Fortuen Stick Enquiry", layout="wide")
-st.title("Fortuen Stick Enquiry")
-# Define the main layout columns: Left for user input, Right for fortune display
 left_col, _, right_col = st.columns([3, 1, 5])
-# ---- Left Column: User Input and Interaction ----
 with left_col:
     left_top = st.container()
     left_bottom = st.container()
@@ -259,17 +191,16 @@ with left_col:
             st.error(st.session_state.error_message)
     if st.session_state.submitted_text:
         with left_bottom:
-            # Add spacing
             for _ in range(5):
                 st.write("")
             col1, col2, col3 = st.columns(3)
             with col2:
                 st.button("Cfu Explain", key="stick_button", on_click=stick_enquiry_callback)
             if st.session_state.stick_clicked:
-                # Display the generated explanation text
                 st.text_area(' ', value=st.session_state.cfu_explain_text, height=300, disabled=True)
-# ---- Right Column: Fortune Display and Images ----
 with right_col:
     with st.container():
         col_left, col_center, col_right = st.columns([1, 2, 1])
@@ -308,3 +239,21 @@ with right_col:
             st.text_area("Description", value=description_text, height=150, disabled=True)
             st.text_area("Detail", value=detail_text, height=150, disabled=True)

+import streamlit as st
+import random
+import pandas as pd
+import requests
+from io import BytesIO
+from PIL import Image
+from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
+import re
 # Define maximum dimensions for the fortune image (in pixels)
 MAX_SIZE = (400, 400)
+# Initialize button click count in session state
 if "button_count_temp" not in st.session_state:
     st.session_state.button_count_temp = 0
+# Set page configuration
+st.set_page_config(page_title="Fortuen Stick Enquiry", layout="wide")
+st.title("Fortuen Stick Enquiry")
+# Initialize session state variables
 if "submitted_text" not in st.session_state:
     st.session_state.submitted_text = False
 if "fortune_number" not in st.session_state:
 if "stick_clicked" not in st.session_state:
     st.session_state.stick_clicked = False
 if "fortune_data" not in st.session_state:
     try:
         st.session_state.fortune_data = pd.read_csv("/home/user/app/resources/detail.csv")
         st.error(f"Error loading CSV: {e}")
         st.session_state.fortune_data = None
 def load_finetuned_classifier_model(question):
     label_list = ["Geomancy", "Lost Property", "Personal Well-Being", "Future Prospect", "Traveling"]
+    # Create a mapping dictionary to convert the default "LABEL_x" output.
     mapping = {f"LABEL_{i}": label for i, label in enumerate(label_list)}
+    pipe = pipeline("text-classification", model="tonyhui2234/CustomModel_classifier_model_10")
+    prediction = pipe(question)[0]['label']
     predicted_label = mapping.get(prediction, prediction)
     print(predicted_label)
     return predicted_label
+@st.cache_resource
+def load_model_and_tokenizer():
+    tokenizer = AutoTokenizer.from_pretrained("tonyhui2234/finetuned_model_text_gen")
+    model = AutoModelForSeq2SeqLM.from_pretrained("tonyhui2234/finetuned_model_text_gen")
+    return tokenizer, model
 def generate_answer(question, fortune):
+    tokenizer, model = load_model_and_tokenizer()
     input_text = "Question: " + question + " Fortune: " + fortune
     inputs = tokenizer(input_text, return_tensors="pt", truncation=True)
     outputs = model.generate(
     return answer
 def analysis(row_detail, classifiy, question):
+    # Use the classifier's output (e.g. "Personal Well-Being") in the regex.
     pattern = re.compile(re.escape(classifiy) + r":\s*(.*?)(?:\.|$)", re.IGNORECASE)
     match = pattern.search(row_detail)
     if match:
         result = match.group(1)
+        # If you want to generate a custom answer, you can call generate_answer()
         return generate_answer(question, result)
     else:
         return "Heaven's secret cannot be revealed."
 def check_sentence_is_english_model(question):
+    pipe_english = pipeline("text-classification", model="papluca/xlm-roberta-base-language-detection")
     return pipe_english(question)[0]['label'] == 'en'
 def check_sentence_is_question_model(question):
+    pipe_question = pipeline("text-classification", model="shahrukhx01/question-vs-statement-classifier")
     return pipe_question(question)[0]['label'] == 'LABEL_1'
 def submit_text_callback():
     question = st.session_state.get("user_sentence", "")
+    # Clear any previous error message
     st.session_state.error_message = ""
     if not check_sentence_is_english_model(question):
         return
     st.session_state.submitted_text = True
+    st.session_state.button_count_temp = 0  # Reset the counter once submission is accepted
+    # Randomly generate a number from 1 to 100
     st.session_state.fortune_number = random.randint(1, 100)
+    # Look up the row in the CSV where CNumber matches the generated fortune number.
     df = st.session_state.fortune_data
     row_detail = ''
     if df is not None:
     print(row_detail)
 def load_and_resize_image(path, max_size=MAX_SIZE):
     try:
         img = Image.open(path)
         img.thumbnail(max_size, Image.Resampling.LANCZOS)
         return None
 def download_and_resize_image(url, max_size=MAX_SIZE):
     try:
         response = requests.get(url)
         response.raise_for_status()
         return None
 def stick_enquiry_callback():
+    # Retrieve the user's question and the fortune detail
     question = st.session_state.get("user_sentence", "")
     if not st.session_state.fortune_row:
         st.error("Fortune data is not available. Please submit your question first.")
         return
     row_detail = st.session_state.fortune_row.get("Detail", "No detail available.")
+    # Run the classifier model after the image has loaded
     classifiy = load_finetuned_classifier_model(question)
+    # Generate the explanation using the analysis function
     cfu_explain = analysis(row_detail, classifiy, question)
+    # Save the returned value in session state for later display
     st.session_state.cfu_explain_text = cfu_explain
     st.session_state.stick_clicked = True
+# Main layout: Left (input) and Right (fortune display)
 left_col, _, right_col = st.columns([3, 1, 5])
+# ---- Left Column ----
 with left_col:
     left_top = st.container()
     left_bottom = st.container()
             st.error(st.session_state.error_message)
     if st.session_state.submitted_text:
         with left_bottom:
             for _ in range(5):
                 st.write("")
             col1, col2, col3 = st.columns(3)
             with col2:
                 st.button("Cfu Explain", key="stick_button", on_click=stick_enquiry_callback)
             if st.session_state.stick_clicked:
+                # Display the explanation text saved from analysis()
                 st.text_area(' ', value=st.session_state.cfu_explain_text, height=300, disabled=True)
+# ---- Right Column ----
 with right_col:
     with st.container():
         col_left, col_center, col_right = st.columns([1, 2, 1])
             st.text_area("Description", value=description_text, height=150, disabled=True)
             st.text_area("Detail", value=detail_text, height=150, disabled=True)
+why when loading the function
+# Define your inference function
+def generate_answer(question, fortune):
+    tokenizer = AutoTokenizer.from_pretrained("tonyhui2234/finetuned_model_text_gen")
+    model = AutoModelForSeq2SeqLM.from_pretrained("tonyhui2234/finetuned_model_text_gen")
+    input_text = "Question: " + question + " Fortune: " + fortune
+    inputs = tokenizer(input_text, return_tensors="pt", truncation=True)
+    outputs = model.generate(
+        **inputs,
+        max_length=256,
+        num_beams=4,
+        early_stopping=True,
+        repetition_penalty=2.0,
+        no_repeat_ngram_size=3
+    )
+    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return answer