import gradio as gr import json from sentence_transformers import SentenceTransformer from transformers import pipeline from sklearn.metrics.pairwise import cosine_similarity import numpy as np import os # === Custom PUP-themed CSS === PUP_Themed_css = """ html, body, .gradio-container, .gr-app { height: 100% !important; margin: 0 !important; padding: 0 !important; background: linear-gradient(to bottom right, #800000, #ff0000, #ffeb3b, #ffa500) !important; font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif !important; color: #1b4332 !important; } """ # === Load Models and Data === embedding_model = SentenceTransformer('paraphrase-mpnet-base-v2') llm = pipeline("text2text-generation", model="google/flan-t5-small") with open("dataset.json", "r") as f: dataset = json.load(f) questions = [item["question"] for item in dataset] answers = [item["answer"] for item in dataset] question_embeddings = embedding_model.encode(questions, convert_to_tensor=True) chat_history = [] feedback_data = [] feedback_questions = [] feedback_answers = [] feedback_embeddings = None if os.path.exists("feedback.json") and os.path.getsize("feedback.json") > 0: with open("feedback.json", "r") as f: try: feedback_data = json.load(f) feedback_questions = [item["question"] for item in feedback_data] feedback_answers = [item["response"] for item in feedback_data] if feedback_questions: feedback_embeddings = embedding_model.encode(feedback_questions, convert_to_tensor=True) except json.JSONDecodeError: feedback_data = [] # === Chatbot Response Function === def chatbot_response(query, chat_history): query_embedding = embedding_model.encode([query], convert_to_tensor=True) # === Feedback Matching === if feedback_embeddings is not None: feedback_scores = cosine_similarity(query_embedding.cpu().numpy(), feedback_embeddings.cpu().numpy())[0] best_idx = int(np.argmax(feedback_scores)) best_score = feedback_scores[best_idx] matched_feedback = feedback_data[best_idx] base_threshold = 0.8 upvotes = matched_feedback.get("upvotes", 0) downvotes = matched_feedback.get("downvotes", 0) adjusted_threshold = base_threshold - (0.01 * upvotes) + (0.01 * downvotes) dynamic_threshold = min(max(adjusted_threshold, 0.4), 1.0) if best_score >= dynamic_threshold: response = matched_feedback["response"] chat_history.append((query, response)) return "", chat_history, gr.update(visible=True) # === Main Handbook Matching === similarity_scores = cosine_similarity(query_embedding.cpu().numpy(), question_embeddings.cpu().numpy())[0] best_idx = int(np.argmax(similarity_scores)) best_score = similarity_scores[best_idx] matched_q = questions[best_idx] matched_a = answers[best_idx] if best_score < 0.4: response = "Sorry, I couldn't find a relevant answer." chat_history.append((query, response)) return "", chat_history, gr.update(visible=True) prompt = ( f"The following is an official university handbook statement:\n" f"\"{matched_a}\"\n\n" f"Please explain this to a student in a short, natural, and easy-to-understand way. " f"Use simple words, and do not add new information." ) llm_response = llm(prompt, max_length=200, do_sample=True, temperature=0.7, top_p=0.9)[0]["generated_text"].strip() if not llm_response: llm_response = "I'm sorry, I couldn't simplify that at the moment." a_embedding = embedding_model.encode([matched_a], convert_to_tensor=True) llm_embedding = embedding_model.encode([llm_response], convert_to_tensor=True) explanation_similarity = cosine_similarity(a_embedding.cpu().numpy(), llm_embedding.cpu().numpy())[0][0] if explanation_similarity >= 0.95: final_response = f"According to the university handbook, {matched_a}" else: final_response = f"According to the university handbook, {matched_a} In simpler terms, {llm_response}" chat_history.append((query, final_response)) return "", chat_history, gr.update(visible=True) # === Feedback Save & Upvote/Downvote Tracking === def record_feedback(feedback, chat_history): global feedback_embeddings if chat_history: last_query, last_response = chat_history[-1] matched = False for item in feedback_data: existing_embedding = embedding_model.encode([item["question"]], convert_to_tensor=True) new_embedding = embedding_model.encode([last_query], convert_to_tensor=True) similarity = cosine_similarity(existing_embedding.cpu().numpy(), new_embedding.cpu().numpy())[0][0] if similarity >= 0.8 and item["response"] == last_response: matched = True votes = {"positive": "upvotes", "negative": "downvotes"} item[votes[feedback]] = item.get(votes[feedback], 0) + 1 break if not matched: entry = { "question": last_query, "response": last_response, "feedback": feedback, "upvotes": 1 if feedback == "positive" else 0, "downvotes": 1 if feedback == "negative" else 0 } feedback_data.append(entry) with open("feedback.json", "w") as f: json.dump(feedback_data, f, indent=4) # Update feedback embeddings feedback_questions = [item["question"] for item in feedback_data] if feedback_questions: feedback_embeddings = embedding_model.encode(feedback_questions, convert_to_tensor=True) return gr.update(visible=False) # === Gradio UI === with gr.Blocks(css=PUP_Themed_css, title="University Handbook AI Chatbot") as demo: gr.Markdown( "