Update calendar_rag.py
Browse files- calendar_rag.py +69 -18
calendar_rag.py
CHANGED
@@ -1279,7 +1279,7 @@ class HybridDocumentStore:
|
|
1279 |
return sorted_docs[:top_k]
|
1280 |
|
1281 |
class ResponseGenerator:
|
1282 |
-
"""Generate responses with
|
1283 |
|
1284 |
def __init__(self, config: PipelineConfig):
|
1285 |
self.generator = OpenAIGenerator(
|
@@ -1288,9 +1288,20 @@ class ResponseGenerator:
|
|
1288 |
)
|
1289 |
self.prompt_builder = PromptBuilder(
|
1290 |
template="""
|
1291 |
-
คุณเป็นที่ปรึกษาทางวิชาการ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1292 |
|
1293 |
-
|
1294 |
|
1295 |
ข้อมูลที่เกี่ยวข้อง:
|
1296 |
{% for doc in context %}
|
@@ -1307,6 +1318,9 @@ class ResponseGenerator:
|
|
1307 |
4. จัดรูปแบบคำตอบให้อ่านง่าย ใช้หัวข้อและย่อหน้าตามความเหมาะสม
|
1308 |
5. สำหรับคำถามเกี่ยวกับข้อกำหนดภาษาอังกฤษหรือขั้นตอนการสมัคร ให้อธิบายข้อมูลอย่างละเอียด
|
1309 |
6. ใส่ข้อความ "หากมีข้อสงสัยเพิ่มเติม สามารถสอบถามได้" ท้ายคำตอบเสมอ
|
|
|
|
|
|
|
1310 |
|
1311 |
กรุณาตอบเป็นภาษาไทย:
|
1312 |
"""
|
@@ -1315,14 +1329,15 @@ class ResponseGenerator:
|
|
1315 |
def generate_response(self,
|
1316 |
query: str,
|
1317 |
documents: List[Document],
|
1318 |
-
query_info: Dict[str, Any]
|
1319 |
-
|
|
|
1320 |
try:
|
1321 |
-
print(query_info)
|
1322 |
result = self.prompt_builder.run(
|
1323 |
query=query,
|
1324 |
context=documents,
|
1325 |
-
format=query_info
|
|
|
1326 |
)
|
1327 |
response = self.generator.run(prompt=result["prompt"])
|
1328 |
return response["replies"][0]
|
@@ -1509,16 +1524,22 @@ class AdvancedQueryProcessor:
|
|
1509 |
return self._get_default_analysis(query)
|
1510 |
|
1511 |
|
|
|
|
|
1512 |
class AcademicCalendarRAG:
|
1513 |
-
"""Enhanced RAG system for academic calendar and program information"""
|
1514 |
|
1515 |
def __init__(self, config: PipelineConfig):
|
1516 |
self.config = config
|
1517 |
-
self.document_store = HybridDocumentStore(config)
|
1518 |
self.query_processor = AdvancedQueryProcessor(config)
|
1519 |
self.response_generator = ResponseGenerator(config)
|
1520 |
self.data_processor = CalendarDataProcessor()
|
1521 |
|
|
|
|
|
|
|
|
|
1522 |
# Initialize data containers
|
1523 |
self.calendar_events = []
|
1524 |
self.program_details = []
|
@@ -1527,6 +1548,13 @@ class AcademicCalendarRAG:
|
|
1527 |
self.study_plans = []
|
1528 |
self.tuition_fees = []
|
1529 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1530 |
def load_data(self, json_data: Dict):
|
1531 |
"""Load and process all data sources"""
|
1532 |
try:
|
@@ -1556,8 +1584,27 @@ class AcademicCalendarRAG:
|
|
1556 |
logger.error(f"Error loading data: {str(e)}")
|
1557 |
raise
|
1558 |
|
1559 |
-
def process_query(self, query: str) -> Dict[str, Any]:
|
1560 |
-
"""Process user query using
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1561 |
max_attempts = 4 # Allow up to 4 attempts
|
1562 |
attempt = 0
|
1563 |
weight_values = [0.3, 0.7, 0.3, 0.7] # Switching semantic retrieval weight
|
@@ -1565,19 +1612,19 @@ class AcademicCalendarRAG:
|
|
1565 |
while attempt < max_attempts:
|
1566 |
attempt += 1
|
1567 |
try:
|
1568 |
-
# Analyze query
|
1569 |
if attempt <= 2:
|
1570 |
-
query_info = self.query_processor.process_query(
|
1571 |
else:
|
1572 |
-
query_info = self.query_processor._get_default_analysis(query)
|
1573 |
logger.info(f"Retrying query processing (attempt {attempt}) with default analysis")
|
1574 |
|
1575 |
-
weight_semantic = weight_values[attempt - 1]
|
1576 |
|
1577 |
# Get relevant documents using hybrid search
|
1578 |
logger.info(f"Attempt {attempt}: Searching with weight_semantic={weight_semantic}")
|
1579 |
documents = self.document_store.hybrid_search(
|
1580 |
-
query=query,
|
1581 |
event_type=query_info.get("event_type"),
|
1582 |
detail_type=query_info.get("detail_type"),
|
1583 |
semester=query_info.get("semester"),
|
@@ -1585,17 +1632,21 @@ class AcademicCalendarRAG:
|
|
1585 |
weight_semantic=weight_semantic
|
1586 |
)
|
1587 |
|
1588 |
-
# Generate response
|
1589 |
response = self.response_generator.generate_response(
|
1590 |
query=query,
|
1591 |
documents=documents,
|
1592 |
-
query_info=query_info
|
|
|
1593 |
).strip()
|
1594 |
|
1595 |
# If response indicates no relevant information, retry with adjusted approach
|
1596 |
if "ขออภัย ไม่พบข้อมูลที่เกี่ยวข้อง" in response and attempt < max_attempts:
|
1597 |
continue # Try again with new weight or default analysis
|
1598 |
|
|
|
|
|
|
|
1599 |
return {
|
1600 |
"query": query,
|
1601 |
"answer": response,
|
|
|
1279 |
return sorted_docs[:top_k]
|
1280 |
|
1281 |
class ResponseGenerator:
|
1282 |
+
"""Generate responses with conversation context awareness"""
|
1283 |
|
1284 |
def __init__(self, config: PipelineConfig):
|
1285 |
self.generator = OpenAIGenerator(
|
|
|
1288 |
)
|
1289 |
self.prompt_builder = PromptBuilder(
|
1290 |
template="""
|
1291 |
+
คุณเป็นที่ปรึกษาทางวิชาการ กรุณาตอบคำถามต่อไปนี้โดยใช้ข้อมูลจากเอกสารที่ให้มาและพิจารณาบริบทจากประวัติการสนทนา
|
1292 |
+
|
1293 |
+
{% if conversation_history %}
|
1294 |
+
ประวัติการสนทนา:
|
1295 |
+
{% for message in conversation_history %}
|
1296 |
+
{% if message.role == 'user' %}
|
1297 |
+
ผู้ใช้: {{ message.content }}
|
1298 |
+
{% else %}
|
1299 |
+
ที่ปรึกษา: {{ message.content }}
|
1300 |
+
{% endif %}
|
1301 |
+
{% endfor %}
|
1302 |
+
{% endif %}
|
1303 |
|
1304 |
+
คำถามปัจจุบัน: {{query}}
|
1305 |
|
1306 |
ข้อมูลที่เกี่ยวข้อง:
|
1307 |
{% for doc in context %}
|
|
|
1318 |
4. จัดรูปแบบคำตอบให้อ่านง่าย ใช้หัวข้อและย่อหน้าตามความเหมาะสม
|
1319 |
5. สำหรับคำถามเกี่ยวกับข้อกำหนดภาษาอังกฤษหรือขั้นตอนการสมัคร ให้อธิบายข้อมูลอย่างละเอียด
|
1320 |
6. ใส่ข้อความ "หากมีข้อสงสัยเพิ่มเติม สามารถสอบถามได้" ท้ายคำตอบเสมอ
|
1321 |
+
7. คำนึงถึงประวัติการสนทนาและให้คำตอบที่ต่อเนื่องกับบทสนทนาก่อนหน้า
|
1322 |
+
8. หากคำถามอ้างอิงถึงข้อมูลในบทสนทนาก่อนหน้า (เช่น "แล้วอันนั้นล่ะ", "มีอะไรอีกบ้าง") ให้พิจารณาบริบทและตอบคำถามอย่างตรงประเด็น
|
1323 |
+
9. กรณีคำถามมีความไม่ชัดเจน ใช้ประวัติการสนทนาเพื่อเข้าใจบริบทของคำถาม
|
1324 |
|
1325 |
กรุณาตอบเป็นภาษาไทย:
|
1326 |
"""
|
|
|
1329 |
def generate_response(self,
|
1330 |
query: str,
|
1331 |
documents: List[Document],
|
1332 |
+
query_info: Dict[str, Any],
|
1333 |
+
conversation_history: List[Dict[str, str]] = None) -> str:
|
1334 |
+
"""Generate response using retrieved documents and conversation history"""
|
1335 |
try:
|
|
|
1336 |
result = self.prompt_builder.run(
|
1337 |
query=query,
|
1338 |
context=documents,
|
1339 |
+
format=query_info.get("response_format", "detailed"),
|
1340 |
+
conversation_history=conversation_history or []
|
1341 |
)
|
1342 |
response = self.generator.run(prompt=result["prompt"])
|
1343 |
return response["replies"][0]
|
|
|
1524 |
return self._get_default_analysis(query)
|
1525 |
|
1526 |
|
1527 |
+
# First, let's modify the AcademicCalendarRAG class to maintain conversation history
|
1528 |
+
|
1529 |
class AcademicCalendarRAG:
|
1530 |
+
"""Enhanced RAG system for academic calendar and program information with conversation memory"""
|
1531 |
|
1532 |
def __init__(self, config: PipelineConfig):
|
1533 |
self.config = config
|
1534 |
+
self.document_store = HybridDocumentStore(config)
|
1535 |
self.query_processor = AdvancedQueryProcessor(config)
|
1536 |
self.response_generator = ResponseGenerator(config)
|
1537 |
self.data_processor = CalendarDataProcessor()
|
1538 |
|
1539 |
+
# Initialize conversation memory
|
1540 |
+
self.conversation_history = []
|
1541 |
+
self.max_history_length = 5 # Keep last 5 exchanges (10 messages)
|
1542 |
+
|
1543 |
# Initialize data containers
|
1544 |
self.calendar_events = []
|
1545 |
self.program_details = []
|
|
|
1548 |
self.study_plans = []
|
1549 |
self.tuition_fees = []
|
1550 |
|
1551 |
+
def add_to_conversation(self, role: str, content: str):
|
1552 |
+
"""Add a message to the conversation history"""
|
1553 |
+
self.conversation_history.append({"role": role, "content": content})
|
1554 |
+
# Limit history length to prevent context overflow
|
1555 |
+
if len(self.conversation_history) > self.max_history_length * 2: # Each exchange is 2 messages
|
1556 |
+
self.conversation_history = self.conversation_history[-(self.max_history_length * 2):]
|
1557 |
+
|
1558 |
def load_data(self, json_data: Dict):
|
1559 |
"""Load and process all data sources"""
|
1560 |
try:
|
|
|
1584 |
logger.error(f"Error loading data: {str(e)}")
|
1585 |
raise
|
1586 |
|
1587 |
+
def process_query(self, query: str, conversation_history=None) -> Dict[str, Any]:
|
1588 |
+
"""Process user query using conversation history and hybrid retrieval."""
|
1589 |
+
# Use provided conversation history or the internal history
|
1590 |
+
if conversation_history is not None:
|
1591 |
+
self.conversation_history = conversation_history
|
1592 |
+
|
1593 |
+
# Add the current query to history
|
1594 |
+
self.add_to_conversation("user", query)
|
1595 |
+
|
1596 |
+
# Create a context-enhanced query by including relevant previous exchanges
|
1597 |
+
query_with_context = query
|
1598 |
+
if self.conversation_history and len(self.conversation_history) > 1:
|
1599 |
+
# Extract previous exchanges to provide context (up to 2 previous exchanges)
|
1600 |
+
prev_exchanges = self.conversation_history[:-1]
|
1601 |
+
if len(prev_exchanges) > 4: # Limit to last 2 exchanges (4 messages)
|
1602 |
+
prev_exchanges = prev_exchanges[-4:]
|
1603 |
+
|
1604 |
+
context_str = "\n".join([f"{msg['role']}: {msg['content']}" for msg in prev_exchanges])
|
1605 |
+
query_with_context = f"Previous conversation:\n{context_str}\n\nCurrent question: {query}"
|
1606 |
+
|
1607 |
+
# Process with conversation context
|
1608 |
max_attempts = 4 # Allow up to 4 attempts
|
1609 |
attempt = 0
|
1610 |
weight_values = [0.3, 0.7, 0.3, 0.7] # Switching semantic retrieval weight
|
|
|
1612 |
while attempt < max_attempts:
|
1613 |
attempt += 1
|
1614 |
try:
|
1615 |
+
# Analyze query - use context-enhanced query for better understanding
|
1616 |
if attempt <= 2:
|
1617 |
+
query_info = self.query_processor.process_query(query_with_context if attempt == 1 else query)
|
1618 |
else:
|
1619 |
+
query_info = self.query_processor._get_default_analysis(query)
|
1620 |
logger.info(f"Retrying query processing (attempt {attempt}) with default analysis")
|
1621 |
|
1622 |
+
weight_semantic = weight_values[attempt - 1]
|
1623 |
|
1624 |
# Get relevant documents using hybrid search
|
1625 |
logger.info(f"Attempt {attempt}: Searching with weight_semantic={weight_semantic}")
|
1626 |
documents = self.document_store.hybrid_search(
|
1627 |
+
query=query_with_context if attempt == 1 else query,
|
1628 |
event_type=query_info.get("event_type"),
|
1629 |
detail_type=query_info.get("detail_type"),
|
1630 |
semester=query_info.get("semester"),
|
|
|
1632 |
weight_semantic=weight_semantic
|
1633 |
)
|
1634 |
|
1635 |
+
# Generate response with conversation context
|
1636 |
response = self.response_generator.generate_response(
|
1637 |
query=query,
|
1638 |
documents=documents,
|
1639 |
+
query_info=query_info,
|
1640 |
+
conversation_history=self.conversation_history
|
1641 |
).strip()
|
1642 |
|
1643 |
# If response indicates no relevant information, retry with adjusted approach
|
1644 |
if "ขออภัย ไม่พบข้อมูลที่เกี่ยวข้อง" in response and attempt < max_attempts:
|
1645 |
continue # Try again with new weight or default analysis
|
1646 |
|
1647 |
+
# Add the response to conversation history
|
1648 |
+
self.add_to_conversation("assistant", response)
|
1649 |
+
|
1650 |
return {
|
1651 |
"query": query,
|
1652 |
"answer": response,
|