Update calendar_rag.py
Browse files- calendar_rag.py +32 -8
    	
        calendar_rag.py
    CHANGED
    
    | @@ -447,7 +447,35 @@ class PipelineConfig: | |
| 447 | 
             
                localization: LocalizationConfig = field(default_factory=LocalizationConfig)
         | 
| 448 |  | 
| 449 | 
             
            def create_default_config(api_key: str) -> PipelineConfig:
         | 
| 450 | 
            -
                 | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 451 |  | 
| 452 | 
             
            class CalendarDataProcessor:
         | 
| 453 | 
             
                """Process and structure calendar data from the new raw-data.json format"""
         | 
| @@ -1384,7 +1412,7 @@ class AcademicCalendarRAG: | |
| 1384 | 
             
                            event_type=query_info.get("event_type"),
         | 
| 1385 | 
             
                            semester=query_info.get("semester"),
         | 
| 1386 | 
             
                            top_k=self.config.retriever.top_k,
         | 
| 1387 | 
            -
                            weight_semantic= | 
| 1388 | 
             
                        )
         | 
| 1389 |  | 
| 1390 | 
             
                        # Add fee information for fee-related queries
         | 
| @@ -1422,10 +1450,6 @@ class AcademicCalendarRAG: | |
| 1422 |  | 
| 1423 | 
             
            #         # Create config with hybrid retrieval settings
         | 
| 1424 | 
             
            #         config = create_default_config(openai_api_key)
         | 
| 1425 | 
            -
            #         config.localization.enable_thai_normalization = True
         | 
| 1426 | 
            -
            #         config.retriever.top_k = 5
         | 
| 1427 | 
            -
            #         config.model.temperature = 0.3
         | 
| 1428 | 
            -
                    
         | 
| 1429 | 
             
            #         pipeline = AcademicCalendarRAG(config)
         | 
| 1430 |  | 
| 1431 | 
             
            #         # Load and process data
         | 
| @@ -1435,13 +1459,13 @@ class AcademicCalendarRAG: | |
| 1435 | 
             
            #         pipeline.load_data(raw_data)
         | 
| 1436 |  | 
| 1437 | 
             
            #         # Test queries with different semantic weights
         | 
| 1438 | 
            -
            #         queries = [" | 
| 1439 |  | 
| 1440 | 
             
            #         print("=" * 80)
         | 
| 1441 |  | 
| 1442 | 
             
            #         for query in queries:
         | 
| 1443 | 
             
            #             print(f"\nQuery: {query}")
         | 
| 1444 | 
            -
            #             result = pipeline.process_query(query | 
| 1445 | 
             
            #             print(f"Answer: {result['answer']}")
         | 
| 1446 | 
             
            #             print("-" * 40)
         | 
| 1447 |  | 
|  | |
| 447 | 
             
                localization: LocalizationConfig = field(default_factory=LocalizationConfig)
         | 
| 448 |  | 
| 449 | 
             
            def create_default_config(api_key: str) -> PipelineConfig:
         | 
| 450 | 
            +
                """
         | 
| 451 | 
            +
                Create a default pipeline configuration with optimized settings for Thai language processing.
         | 
| 452 | 
            +
                
         | 
| 453 | 
            +
                Args:
         | 
| 454 | 
            +
                    api_key (str): OpenAI API key
         | 
| 455 | 
            +
                    
         | 
| 456 | 
            +
                Returns:
         | 
| 457 | 
            +
                    PipelineConfig: Configured pipeline settings
         | 
| 458 | 
            +
                """
         | 
| 459 | 
            +
                return PipelineConfig(
         | 
| 460 | 
            +
                    model=ModelConfig(
         | 
| 461 | 
            +
                        openai_api_key=api_key,
         | 
| 462 | 
            +
                        temperature=0.3  # Lower temperature for more focused responses
         | 
| 463 | 
            +
                    ),
         | 
| 464 | 
            +
                    retriever=RetrieverConfig(
         | 
| 465 | 
            +
                        top_k=5  # Optimal number of documents to retrieve
         | 
| 466 | 
            +
                    ),
         | 
| 467 | 
            +
                    cache=CacheConfig(
         | 
| 468 | 
            +
                        enabled=True,
         | 
| 469 | 
            +
                        cache_dir=Path("./cache"),
         | 
| 470 | 
            +
                        ttl=86400  # 24 hour cache
         | 
| 471 | 
            +
                    ),
         | 
| 472 | 
            +
                    processing=ProcessingConfig(
         | 
| 473 | 
            +
                        batch_size=32  # Default batch size for processing
         | 
| 474 | 
            +
                    ),
         | 
| 475 | 
            +
                    localization=LocalizationConfig(
         | 
| 476 | 
            +
                        enable_thai_normalization=True  # Enable Thai text normalization
         | 
| 477 | 
            +
                    )
         | 
| 478 | 
            +
                )
         | 
| 479 |  | 
| 480 | 
             
            class CalendarDataProcessor:
         | 
| 481 | 
             
                """Process and structure calendar data from the new raw-data.json format"""
         | 
|  | |
| 1412 | 
             
                            event_type=query_info.get("event_type"),
         | 
| 1413 | 
             
                            semester=query_info.get("semester"),
         | 
| 1414 | 
             
                            top_k=self.config.retriever.top_k,
         | 
| 1415 | 
            +
                            weight_semantic= 0.3
         | 
| 1416 | 
             
                        )
         | 
| 1417 |  | 
| 1418 | 
             
                        # Add fee information for fee-related queries
         | 
|  | |
| 1450 |  | 
| 1451 | 
             
            #         # Create config with hybrid retrieval settings
         | 
| 1452 | 
             
            #         config = create_default_config(openai_api_key)
         | 
|  | |
|  | |
|  | |
|  | |
| 1453 | 
             
            #         pipeline = AcademicCalendarRAG(config)
         | 
| 1454 |  | 
| 1455 | 
             
            #         # Load and process data
         | 
|  | |
| 1459 | 
             
            #         pipeline.load_data(raw_data)
         | 
| 1460 |  | 
| 1461 | 
             
            #         # Test queries with different semantic weights
         | 
| 1462 | 
            +
            #         queries = ["มีวิชาหลักหรือวิชาเลือกอะไรบ้าง"]
         | 
| 1463 |  | 
| 1464 | 
             
            #         print("=" * 80)
         | 
| 1465 |  | 
| 1466 | 
             
            #         for query in queries:
         | 
| 1467 | 
             
            #             print(f"\nQuery: {query}")
         | 
| 1468 | 
            +
            #             result = pipeline.process_query(query)
         | 
| 1469 | 
             
            #             print(f"Answer: {result['answer']}")
         | 
| 1470 | 
             
            #             print("-" * 40)
         | 
| 1471 |  | 
