Spaces:

JirasakJo
/

Questions_Graduate_Studies_Calendar_2024

Sleeping

App Files Files Community

Questions_Graduate_Studies_Calendar_2024 / app.py

JirasakJo

Update app.py

9314e12 verified 5 months ago

raw

history blame

25.9 kB

	import streamlit as st
	import json
	import os
	import requests
	import json
	import base64
	from datetime import datetime, timedelta
	import subprocess
	from huggingface_hub import HfApi
	from pathlib import Path
	from calendar_rag import (
	create_default_config,
	AcademicCalendarRAG,
	PipelineConfig,
	ModelConfig,
	RetrieverConfig,
	CacheConfig,
	ProcessingConfig,
	LocalizationConfig
	)
	# Custom CSS for enhanced styling
	def load_custom_css():
	st.markdown("""
	<style>
	/* General body styling */
	body {
	font-family: "Arial", sans-serif !important;
	color: #000000 !important;
	background-color: white !important;
	line-height: 1.7 !important;
	}

	/* Main container styling */
	.main {
	padding: 2rem;
	color: #000000;
	background-color: white;
	}

	/* Headers styling */
	h1 {
	color: #000000;
	font-size: 2.8rem !important;
	font-weight: 700 !important;
	margin-bottom: 1.5rem !important;
	text-align: center;
	padding: 1rem 0;
	border-bottom: 3px solid #1E3A8A;
	}

	h3, h4 {
	color: #000000;
	font-weight: 600 !important;
	font-size: 1.6rem !important;
	margin-top: 1.5rem !important;
	}

	/* Chat message styling */
	.chat-message {
	padding: 1.5rem;
	border-radius: 10px;
	margin: 1rem 0;
	box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
	font-size: 1.1rem !important;
	line-height: 1.6 !important;
	font-family: "Arial", sans-serif !important;
	color: #000000 !important;
	}

	.user-message {
	background-color: #F3F4F6 !important;
	}

	.assistant-message {
	background-color: #EFF6FF !important;
	}

	/* Status indicators */
	.status-indicator {
	padding: 0.5rem 1rem;
	border-radius: 6px;
	font-weight: 500;
	font-size: 1.2rem;
	color: #000000;
	}

	.status-online {
	background-color: #DEF7EC;
	color: #03543F;
	}

	.status-offline {
	background-color: #FDE8E8;
	color: rgb(255, 255, 255);
	}
	</style>
	""", unsafe_allow_html=True)

	def clear_conversation_context():
	"""Clear conversation context but keep chat display history"""
	# Clear the RAG pipeline's conversation history
	if 'pipeline' in st.session_state and st.session_state.pipeline:
	st.session_state.pipeline.conversation_history = []

	# Clear the context memory
	st.session_state.context_memory = []

	# Note: We keep st.session_state.chat_history for UI display purposes

	def initialize_pipeline():
	"""Initialize RAG pipeline with conversation memory support"""
	try:
	# Get API key from environment or secrets
	openai_api_key = os.getenv('OPENAI_API_KEY') or st.secrets['OPENAI_API_KEY']

	# Create config with same settings as main()
	config = create_default_config(openai_api_key)

	# Create pipeline
	pipeline = AcademicCalendarRAG(config)

	# Load raw data instead of calendar.json
	try:
	with open("calendar.json", "r", encoding="utf-8") as f:
	raw_data = json.load(f)
	pipeline.load_data(raw_data)

	# Initialize conversation history from session state if available
	if 'context_memory' in st.session_state and st.session_state.context_memory:
	# Convert context memory to conversation history format
	conversation_history = []
	for item in st.session_state.context_memory:
	conversation_history.append({"role": "user", "content": item["query"]})
	conversation_history.append({"role": "assistant", "content": item["response"]})
	pipeline.conversation_history = conversation_history

	return pipeline
	except FileNotFoundError:
	st.error("calendar.json not found. Please ensure the file exists in the same directory.")
	return None

	except Exception as e:
	st.error(f"Error initializing pipeline: {str(e)}")
	return None


	def load_qa_history():
	"""Load QA history directly from GitHub repository"""
	try:
	import requests
	import base64
	import json

	# GitHub API configuration
	REPO_OWNER = "jirasaksaimekJijo"
	REPO_NAME = "swu-chat-bot-project"
	FILE_PATH = "qa_history.json"
	GITHUB_TOKEN = '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'

	# Set up GitHub API request
	api_url = f"https://api.github.com/repos/{REPO_OWNER}/{REPO_NAME}/contents/{FILE_PATH}"
	headers = {"Accept": "application/vnd.github.v3+json"}

	if GITHUB_TOKEN:
	headers["Authorization"] = f"token {GITHUB_TOKEN}"

	# Make the request to GitHub API
	response = requests.get(api_url, headers=headers)

	if response.status_code == 200:
	# Decode the content from base64
	content_data = response.json()
	file_content = base64.b64decode(content_data["content"]).decode("utf-8")
	# Parse JSON
	history_data = json.loads(file_content)
	return history_data
	else:
	st.warning(f"Failed to fetch QA history: {response.status_code} - {response.reason}")
	# Return empty list if file doesn't exist or can't be accessed
	return []

	except Exception as e:
	st.error(f"Error loading QA history from GitHub: {str(e)}")
	return []

	def save_qa_history(history_entry):
	"""Save QA history entry to local JSON file and push to GitHub"""
	try:
	import requests
	import base64
	import json
	from pathlib import Path

	# GitHub API configuration
	REPO_OWNER = "jirasaksaimekJijo"
	REPO_NAME = "swu-chat-bot-project"
	FILE_PATH = "qa_history.json"
	GITHUB_TOKEN = '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'

	# First, load existing data from GitHub
	api_url = f"https://api.github.com/repos/{REPO_OWNER}/{REPO_NAME}/contents/{FILE_PATH}"
	headers = {
	"Accept": "application/vnd.github.v3+json",
	"Authorization": f"token {GITHUB_TOKEN}"
	}

	# Try to get existing file first
	response = requests.get(api_url, headers=headers)

	# Initialize empty history data
	history_data = []
	sha = None

	if response.status_code == 200:
	# File exists, get its content and SHA
	content_data = response.json()
	sha = content_data["sha"]

	try:
	# Decode and parse existing content
	file_content = base64.b64decode(content_data["content"]).decode("utf-8")

	if file_content.strip(): # Make sure content is not empty
	history_data = json.loads(file_content)

	# Ensure history_data is a list
	if not isinstance(history_data, list):
	st.warning("Existing history data is not a list. Initializing new list.")
	history_data = []
	except Exception as e:
	st.warning(f"Error parsing existing history: {e}. Initializing new list.")
	elif response.status_code == 404:
	# File doesn't exist yet
	st.info("Creating new QA history file.")
	else:
	st.error(f"Failed to check existing history: {response.status_code} - {response.reason}")

	# Process history entry before appending
	if isinstance(history_entry, dict) and all(key in history_entry for key in ["timestamp", "query", "answer"]):
	# Process answer if it's a dict
	if isinstance(history_entry["answer"], dict):
	history_entry["answer"] = history_entry["answer"].get('answer', str(history_entry["answer"]))
	# Process answer if it's a Document-like object
	elif hasattr(history_entry["answer"], 'content'):
	history_entry["answer"] = history_entry["answer"].content
	# Convert to string for any other type
	else:
	history_entry["answer"] = str(history_entry["answer"])

	# Append new entry to history data
	history_data.append(history_entry)

	# Also save locally for backup
	try:
	local_path = Path("qa_history.json")
	with open(local_path, "w", encoding="utf-8") as f:
	json.dump(history_data, f, ensure_ascii=False, indent=2)
	except Exception as local_err:
	st.warning(f"Failed to save local backup: {local_err}")

	# Prepare content for GitHub
	updated_content = json.dumps(history_data, ensure_ascii=False, indent=2)
	encoded_content = base64.b64encode(updated_content.encode('utf-8')).decode('utf-8')

	# Prepare the update/create payload
	data = {
	"message": "Update QA history",
	"content": encoded_content,
	}

	if sha: # If file exists, include its SHA
	data["sha"] = sha

	# Update or create the file
	update_response = requests.put(api_url, headers=headers, json=data)

	if update_response.status_code in [200, 201]:
	return True
	else:
	st.error(f"Failed to update QA history: {update_response.status_code} - {update_response.text}")
	return False

	except Exception as e:
	import traceback
	st.error(f"Error in save_qa_history: {str(e)}")
	st.error(f"Traceback: {traceback.format_exc()}")
	return False

	def add_to_qa_history(query: str, answer: str):
	"""Add new QA pair to history with validation"""
	try:
	# Validate inputs
	if not query or not answer:
	st.warning("Empty query or answer detected, skipping history update")
	return None

	# Handle different answer types
	if isinstance(answer, dict):
	# If answer is a dict with 'answer' key, extract it
	processed_answer = answer.get('answer', str(answer))
	elif hasattr(answer, 'content'):
	# If answer is a Document-like object with content attribute
	processed_answer = answer.content
	else:
	# Convert answer to string for any other type
	processed_answer = str(answer)

	# Create history entry with proper timestamp
	history_entry = {
	"timestamp": (datetime.now() + timedelta(hours=5)).strftime("%Y-%m-%dT%H:%M:%S"),
	"query": query,
	"answer": processed_answer
	}

	# Save entry
	save_qa_history(history_entry)
	return history_entry

	except Exception as e:
	st.error(f"Error in add_to_qa_history: {str(e)}")
	return None

	def add_to_history(role: str, message: str):
	"""Add message to chat history, save if it's a complete QA pair, and update context memory"""
	st.session_state.chat_history.append((role, message))

	# If this is an assistant response, save the QA pair
	if role == "assistant" and len(st.session_state.chat_history) >= 2:
	# Get the corresponding user query (previous message)
	user_query = st.session_state.chat_history[-2][1]

	# Process and save the QA pair
	history_entry = add_to_qa_history(user_query, message)

	# Also update context memory if needed
	if 'context_memory' not in st.session_state:
	st.session_state.context_memory = []

	# Format response content for context memory
	if isinstance(message, dict) and "answer" in message:
	response_content = message["answer"]
	else:
	response_content = message

	st.session_state.context_memory.append({
	"query": user_query,
	"response": response_content,
	"timestamp": (datetime.now() + timedelta(hours=5)).strftime("%Y-%m-%dT%H:%M:%S")
	})

	# Limit context memory size to prevent performance issues
	if len(st.session_state.context_memory) > 10: # Keep last 10 exchanges
	st.session_state.context_memory = st.session_state.context_memory[-10:]

	def display_chat_history():
	"""Display chat history with improved document display"""
	for role, content in st.session_state.chat_history:
	if role == "user":
	st.markdown(f"""
	<div class="chat-message user-message">
	<strong>🧑 คำถาม:</strong><br>
	{content}
	</div>
	""", unsafe_allow_html=True)
	else:
	if isinstance(content, dict):
	assistant_response = content.get('answer', '❌ ไม่มีข้อมูลคำตอบ')
	st.markdown(f"""
	<div class="chat-message assistant-message">
	<strong>🤖 คำตอบ:</strong><br>
	{assistant_response}
	</div>
	""", unsafe_allow_html=True)

	# Show reference documents like in main()
	if content.get('documents'):
	with st.expander("📚 ข้อมูลอ้างอิง", expanded=False):
	for i, doc in enumerate(content['documents'], 1):
	st.markdown(f"""
	<div style="padding: 1rem; background-color: #000000; border-radius: 8px; margin: 0.5rem 0;">
	<strong>เอกสารที่ {i}:</strong><br>
	{doc.content}
	</div>
	""", unsafe_allow_html=True)
	else:
	st.markdown(f"""
	<div class="chat-message assistant-message">
	<strong>🤖 คำตอบ:</strong><br>
	{content}
	</div>
	""", unsafe_allow_html=True)

	if 'context_memory' not in st.session_state:
	st.session_state.context_memory = []

	def handle_submit(user_query: str):
	"""Enhanced query handling with improved conversation history tracking"""
	if not user_query:
	st.warning("⚠️ กรุณาระบุคำถาม")
	return

	user_query = user_query.strip()

	# Prevent duplicate submissions
	if not st.session_state.chat_history or st.session_state.chat_history[-1][1] != user_query:
	try:
	st.session_state.processing_query = True

	# Add user message to chat history
	add_to_history("user", user_query)

	# Convert the Streamlit chat history format to RAG format
	# Include more context history (up to 5 previous exchanges)
	rag_conversation_history = []
	history_to_include = st.session_state.chat_history[-11:] if len(st.session_state.chat_history) > 10 else st.session_state.chat_history

	for role, content in history_to_include:
	# Map Streamlit's role names to the format used in the RAG system
	rag_role = "user" if role == "user" else "assistant"

	# Handle content based on type
	if isinstance(content, dict) and "answer" in content:
	rag_content = content["answer"]
	else:
	rag_content = content

	rag_conversation_history.append({"role": rag_role, "content": rag_content})

	# Store conversation context in session state
	if 'context_memory' not in st.session_state:
	st.session_state.context_memory = []

	# Process query with improved conversation history
	with st.spinner("🔍 กำลังค้นหาคำตอบ..."):
	# Add debug logging to verify context
	print(f"Processing query with {len(rag_conversation_history)} context messages")

	# Add special handling for reference questions
	reference_keywords = ["ก่อนหน้านี้", "ก่อนหน้า", "ที่ผ่านมา", "คำถามก่อนหน้า", "คำถามที่แล้ว",
	"previous", "earlier", "before", "last time", "last question"]

	is_reference_question = any(keyword in user_query.lower() for keyword in reference_keywords)

	# If this is a reference question, emphasize context in the query
	if is_reference_question and len(rag_conversation_history) >= 3:
	# Extract the previous user question (should be 2 positions back)
	previous_questions = [msg["content"] for msg in rag_conversation_history[:-2]
	if msg["role"] == "user"]

	if previous_questions:
	prev_question = previous_questions[-1]
	enhanced_query = f"คำถามนี้อ้างอิงถึงคำถามก่อนหน้า '{prev_question}' โปรดพิจารณาบริบทนี้ในการตอบ: {user_query}"
	print(f"Enhanced reference query: {enhanced_query}")
	user_query = enhanced_query

	result = st.session_state.pipeline.process_query(
	query=user_query,
	conversation_history=rag_conversation_history
	)

	# Create response with same structure as main()
	response_dict = {
	"answer": result.get("answer", ""),
	"documents": result.get("relevant_docs", [])
	}

	# Update chat history and context
	add_to_history("assistant", response_dict)

	# Add this exchange to context memory for future reference
	st.session_state.context_memory.append({
	"query": user_query,
	"response": response_dict["answer"],
	"timestamp": datetime.now().isoformat()
	})

	except Exception as e:
	error_msg = f"❌ เกิดข้อผิดพลาด: {str(e)}"
	add_to_history("assistant", error_msg)
	st.error(f"Query processing error: {e}")

	finally:
	st.session_state.processing_query = False
	st.rerun()

	def create_chat_input():
	"""Create chat input with enhanced configuration and combined clear button"""
	with st.form(key="chat_form", clear_on_submit=True):
	st.markdown("""
	<label for="query_input" style="font-size: 1.2rem; font-weight: 600; margin-bottom: 1rem; display: block;">
	<span style="color: #ffffff; border-left: 4px solid #ffffff; padding-left: 0.8rem;">
	โปรดระบุคำถามเกี่ยวกับปฏิทินการศึกษา:
	</span>
	</label>
	""", unsafe_allow_html=True)

	query = st.text_input(
	"",
	key="query_input",
	placeholder="เช่น: วิชาเลือกมีอะไรบ้าง?"
	)

	col1, col2 = st.columns([5, 5])

	with col1:
	submitted = st.form_submit_button(
	"📤 ส่งคำถาม",
	type="primary",
	use_container_width=True
	)

	with col2:
	clear_all_button = st.form_submit_button(
	"🗑️ ล้างประวัติและบริบทสนทนา",
	type="secondary",
	use_container_width=True
	)

	if submitted:
	handle_submit(query)

	if clear_all_button:
	# Clear chat history
	st.session_state.chat_history = []
	# Clear conversation context
	clear_conversation_context()
	st.info("ล้างประวัติและบริบทสนทนาแล้ว")
	st.rerun()

	def main():
	# Page config
	st.set_page_config(
	page_title="Academic Calendar Assistant",
	page_icon="📅",
	layout="wide",
	initial_sidebar_state="collapsed"
	)

	# Load custom CSS
	load_custom_css()

	# Initialize session states
	if 'pipeline' not in st.session_state:
	st.session_state.pipeline = None

	if 'chat_history' not in st.session_state:
	st.session_state.chat_history = []

	if 'context_memory' not in st.session_state:
	st.session_state.context_memory = []

	if 'processing_query' not in st.session_state:
	st.session_state.processing_query = False

	# Initialize pipeline with enhanced configuration
	if st.session_state.pipeline is None:
	with st.spinner("กำลังเริ่มต้นระบบ..."):
	st.session_state.pipeline = initialize_pipeline()

	# Header
	st.markdown("""
	<div style="text-align: center; padding: 2rem 0;">
	<h1>🎓 ผู้ช่วยค้นหาข้อมูลหลักสูตรและปฏิทินการศึกษา</h1>
	<p style="font-size: 1.2rem; color: #666;">บัณฑิตวิทยาลัย มหาวิทยาลัยศรีนครินทรวิโรฒ</p>
	</div>
	""", unsafe_allow_html=True)

	chat_col, info_col = st.columns([7, 3])

	with chat_col:
	display_chat_history()
	create_chat_input()

	# Info column
	with info_col:
	st.markdown("""
	<div style="background-color: #F9FAFB; padding: 1.5rem; border-radius: 12px; margin-bottom: 2rem;">
	<h3 style="color: #1E3A8A;">ℹ️ เกี่ยวกับระบบ</h3>
	<p style="color: #000000;">
	ระบบนี้ใช้เทคโนโลยี <strong>RAG (Retrieval-Augmented Generation)</strong>
	ในการค้นหาและตอบคำถามเกี่ยวกับหลักสูตรและปฏิทินการศึกษา
	</p>
	<h4 style="color: #1E3A8A; margin-top: 1rem;">สามารถสอบถามข้อมูลเกี่ยวกับ:</h4>
	<ul style="list-style-type: none; padding-left: 0;">
	<li style="color: #000000; margin-bottom: 0.5rem;">📚 รายวิชาในหลักสูตร</li>
	<li style="color: #000000; margin-bottom: 0.5rem;">📝 การลงทะเบียนเรียน</li>
	<li style="color: #000000; margin-bottom: 0.5rem;">📅 กำหนดการต่างๆ</li>
	<li style="color: #000000; margin-bottom: 0.5rem;">💰 ค่าธรรมเนียมการศึกษา</li>
	<li style="color: #000000; margin-bottom: 0.5rem;">📋 ขั้นตอนการสมัคร</li>
	</ul>
	</div>
	""", unsafe_allow_html=True)

	st.markdown("""
	<div style="background-color: #f9fafb; padding: 1.5rem; border-radius: 12px;">
	<h3 style="color: #1E3A8A;">🔄 สถานะระบบ</h3>
	<div style="margin-top: 1rem;">
	<p><strong style="color: #000000;">⏰ เวลาปัจจุบัน:</strong><br>
	<span style="color: #000000;">{}</span></p>
	<p><strong style="color: #000000;">📡 สถานะระบบ:</strong><br>
	<span class="status-indicator {}">
	{} {}
	</span></p>
	</div>
	</div>
	""".format(
	(datetime.now() + timedelta(hours=5)).strftime('%Y-%m-%d %H:%M:%S'),
	"status-online" if st.session_state.pipeline else "status-offline",
	"🟢" if st.session_state.pipeline else "🔴",
	"พร้อมใช้งาน" if st.session_state.pipeline else "ไม่พร้อมใช้งาน"
	), unsafe_allow_html=True)

	if __name__ == "__main__":
	main()