Spaces:

JirasakJo
/

Questions_Graduate_Studies_Calendar_2024

Sleeping

App Files Files Community

JirasakJo commited on Feb 24

Commit

b1922d8

verified ·

1 Parent(s): 1f3d6e8

Update calendar_rag.py

Browse files

Files changed (1) hide show

calendar_rag.py +69 -18

calendar_rag.py CHANGED Viewed

@@ -1279,7 +1279,7 @@ class HybridDocumentStore:
         return sorted_docs[:top_k]
 class ResponseGenerator:
-    """Generate responses with better context utilization"""
     def __init__(self, config: PipelineConfig):
         self.generator = OpenAIGenerator(
@@ -1288,9 +1288,20 @@ class ResponseGenerator:
         )
         self.prompt_builder = PromptBuilder(
             template="""
-            คุณเป็นที่ปรึกษาทางวิชาการ กรุณาตอบคำถามต่อไปนี้โดยใช้ข้อมูลจากเอกสารที่ให้มาเท่านั้น
-            คำถาม: {{query}}
             ข้อมูลที่เกี่ยวข้อง:
             {% for doc in context %}
@@ -1307,6 +1318,9 @@ class ResponseGenerator:
             4. จัดรูปแบบคำตอบให้อ่านง่าย ใช้หัวข้อและย่อหน้าตามความเหมาะสม
             5. สำหรับคำถามเกี่ยวกับข้อกำหนดภาษาอังกฤษหรือขั้นตอนการสมัคร ให้อธิบายข้อมูลอย่างละเอียด
             6. ใส่ข้อความ "หากมีข้อสงสัยเพิ่มเติม สามารถสอบถามได้" ท้ายคำตอบเสมอ
             กรุณาตอบเป็นภาษาไทย:
             """
@@ -1315,14 +1329,15 @@ class ResponseGenerator:
     def generate_response(self,
                          query: str,
                          documents: List[Document],
-                         query_info: Dict[str, Any]) -> str:
-        """Generate response using retrieved documents"""
         try:
-            print(query_info)
             result = self.prompt_builder.run(
                 query=query,
                 context=documents,
-                format=query_info["response_format"]
             )
             response = self.generator.run(prompt=result["prompt"])
             return response["replies"][0]
@@ -1509,16 +1524,22 @@ class AdvancedQueryProcessor:
             return self._get_default_analysis(query)
 class AcademicCalendarRAG:
-    """Enhanced RAG system for academic calendar and program information"""
     def __init__(self, config: PipelineConfig):
         self.config = config
-        self.document_store = HybridDocumentStore(config)  # Use the new hybrid store
         self.query_processor = AdvancedQueryProcessor(config)
         self.response_generator = ResponseGenerator(config)
         self.data_processor = CalendarDataProcessor()
         # Initialize data containers
         self.calendar_events = []
         self.program_details = []
@@ -1527,6 +1548,13 @@ class AcademicCalendarRAG:
         self.study_plans = []
         self.tuition_fees = []
     def load_data(self, json_data: Dict):
         """Load and process all data sources"""
         try:
@@ -1556,8 +1584,27 @@ class AcademicCalendarRAG:
             logger.error(f"Error loading data: {str(e)}")
             raise
-    def process_query(self, query: str) -> Dict[str, Any]:
-        """Process user query using hybrid retrieval with dynamic retries."""
         max_attempts = 4  # Allow up to 4 attempts
         attempt = 0
         weight_values = [0.3, 0.7, 0.3, 0.7]  # Switching semantic retrieval weight
@@ -1565,19 +1612,19 @@ class AcademicCalendarRAG:
         while attempt < max_attempts:
             attempt += 1
             try:
-                # Analyze query
                 if attempt <= 2:
-                    query_info = self.query_processor.process_query(query)  # Use normal query analysis for first 2 attempts
                 else:
-                    query_info = self.query_processor._get_default_analysis(query)  # Use default analysis for last 2 attempts
                     logger.info(f"Retrying query processing (attempt {attempt}) with default analysis")
-                weight_semantic = weight_values[attempt - 1]  # Adjust weight for semantic search dynamically
                 # Get relevant documents using hybrid search
                 logger.info(f"Attempt {attempt}: Searching with weight_semantic={weight_semantic}")
                 documents = self.document_store.hybrid_search(
-                    query=query,
                     event_type=query_info.get("event_type"),
                     detail_type=query_info.get("detail_type"),
                     semester=query_info.get("semester"),
@@ -1585,17 +1632,21 @@ class AcademicCalendarRAG:
                     weight_semantic=weight_semantic
                 )
-                # Generate response
                 response = self.response_generator.generate_response(
                     query=query,
                     documents=documents,
-                    query_info=query_info
                 ).strip()
                 # If response indicates no relevant information, retry with adjusted approach
                 if "ขออภัย ไม่พบข้อมูลที่เกี่ยวข้อง" in response and attempt < max_attempts:
                     continue  # Try again with new weight or default analysis
                 return {
                     "query": query,
                     "answer": response,

         return sorted_docs[:top_k]
 class ResponseGenerator:
+    """Generate responses with conversation context awareness"""
     def __init__(self, config: PipelineConfig):
         self.generator = OpenAIGenerator(
         )
         self.prompt_builder = PromptBuilder(
             template="""
+            คุณเป็นที่ปรึกษาทางวิชาการ กรุณาตอบคำถามต่อไปนี้โดยใช้ข้อมูลจากเอกสารที่ให้มาและพิจารณาบริบทจากประวัติการสนทนา
+            {% if conversation_history %}
+            ประวัติการสนทนา:
+            {% for message in conversation_history %}
+            {% if message.role == 'user' %}
+            ผู้ใช้: {{ message.content }}
+            {% else %}
+            ที่ปรึกษา: {{ message.content }}
+            {% endif %}
+            {% endfor %}
+            {% endif %}
+            คำถามปัจจุบัน: {{query}}
             ข้อมูลที่เกี่ยวข้อง:
             {% for doc in context %}
             4. จัดรูปแบบคำตอบให้อ่านง่าย ใช้หัวข้อและย่อหน้าตามความเหมาะสม
             5. สำหรับคำถามเกี่ยวกับข้อกำหนดภาษาอังกฤษหรือขั้นตอนการสมัคร ให้อธิบายข้อมูลอย่างละเอียด
             6. ใส่ข้อความ "หากมีข้อสงสัยเพิ่มเติม สามารถสอบถามได้" ท้ายคำตอบเสมอ
+            7. คำนึงถึงประวัติการสนทนาและให้คำตอบที่ต่อเนื่องกับบทสนทนาก่อนหน้า
+            8. หากคำถามอ้างอิงถึงข้อมูลในบทสนทนาก่อนหน้า (เช่น "แล้วอันนั้นล่ะ", "มีอะไรอีกบ้าง") ให้พิจารณาบริบทและตอบคำถามอย่างตรงประเด็น
+            9. กรณีคำถามมีความไม่ชัดเจน ใช้ประวัติการสนทนาเพื่อเข้าใจบริบทของคำถาม
             กรุณาตอบเป็นภาษาไทย:
             """
     def generate_response(self,
                          query: str,
                          documents: List[Document],
+                         query_info: Dict[str, Any],
+                         conversation_history: List[Dict[str, str]] = None) -> str:
+        """Generate response using retrieved documents and conversation history"""
         try:
             result = self.prompt_builder.run(
                 query=query,
                 context=documents,
+                format=query_info.get("response_format", "detailed"),
+                conversation_history=conversation_history or []
             )
             response = self.generator.run(prompt=result["prompt"])
             return response["replies"][0]
             return self._get_default_analysis(query)
+# First, let's modify the AcademicCalendarRAG class to maintain conversation history
 class AcademicCalendarRAG:
+    """Enhanced RAG system for academic calendar and program information with conversation memory"""
     def __init__(self, config: PipelineConfig):
         self.config = config
+        self.document_store = HybridDocumentStore(config)
         self.query_processor = AdvancedQueryProcessor(config)
         self.response_generator = ResponseGenerator(config)
         self.data_processor = CalendarDataProcessor()
+        # Initialize conversation memory
+        self.conversation_history = []
+        self.max_history_length = 5  # Keep last 5 exchanges (10 messages)
         # Initialize data containers
         self.calendar_events = []
         self.program_details = []
         self.study_plans = []
         self.tuition_fees = []
+    def add_to_conversation(self, role: str, content: str):
+        """Add a message to the conversation history"""
+        self.conversation_history.append({"role": role, "content": content})
+        # Limit history length to prevent context overflow
+        if len(self.conversation_history) > self.max_history_length * 2:  # Each exchange is 2 messages
+            self.conversation_history = self.conversation_history[-(self.max_history_length * 2):]
     def load_data(self, json_data: Dict):
         """Load and process all data sources"""
         try:
             logger.error(f"Error loading data: {str(e)}")
             raise
+    def process_query(self, query: str, conversation_history=None) -> Dict[str, Any]:
+        """Process user query using conversation history and hybrid retrieval."""
+        # Use provided conversation history or the internal history
+        if conversation_history is not None:
+            self.conversation_history = conversation_history
+        # Add the current query to history
+        self.add_to_conversation("user", query)
+        # Create a context-enhanced query by including relevant previous exchanges
+        query_with_context = query
+        if self.conversation_history and len(self.conversation_history) > 1:
+            # Extract previous exchanges to provide context (up to 2 previous exchanges)
+            prev_exchanges = self.conversation_history[:-1]
+            if len(prev_exchanges) > 4:  # Limit to last 2 exchanges (4 messages)
+                prev_exchanges = prev_exchanges[-4:]
+            context_str = "\n".join([f"{msg['role']}: {msg['content']}" for msg in prev_exchanges])
+            query_with_context = f"Previous conversation:\n{context_str}\n\nCurrent question: {query}"
+        # Process with conversation context
         max_attempts = 4  # Allow up to 4 attempts
         attempt = 0
         weight_values = [0.3, 0.7, 0.3, 0.7]  # Switching semantic retrieval weight
         while attempt < max_attempts:
             attempt += 1
             try:
+                # Analyze query - use context-enhanced query for better understanding
                 if attempt <= 2:
+                    query_info = self.query_processor.process_query(query_with_context if attempt == 1 else query)
                 else:
+                    query_info = self.query_processor._get_default_analysis(query)
                     logger.info(f"Retrying query processing (attempt {attempt}) with default analysis")
+                weight_semantic = weight_values[attempt - 1]
                 # Get relevant documents using hybrid search
                 logger.info(f"Attempt {attempt}: Searching with weight_semantic={weight_semantic}")
                 documents = self.document_store.hybrid_search(
+                    query=query_with_context if attempt == 1 else query,
                     event_type=query_info.get("event_type"),
                     detail_type=query_info.get("detail_type"),
                     semester=query_info.get("semester"),
                     weight_semantic=weight_semantic
                 )
+                # Generate response with conversation context
                 response = self.response_generator.generate_response(
                     query=query,
                     documents=documents,
+                    query_info=query_info,
+                    conversation_history=self.conversation_history
                 ).strip()
                 # If response indicates no relevant information, retry with adjusted approach
                 if "ขออภัย ไม่พบข้อมูลที่เกี่ยวข้อง" in response and attempt < max_attempts:
                     continue  # Try again with new weight or default analysis
+                # Add the response to conversation history
+                self.add_to_conversation("assistant", response)
                 return {
                     "query": query,
                     "answer": response,