Update calendar_rag.py
Browse files- calendar_rag.py +32 -8
calendar_rag.py
CHANGED
@@ -447,7 +447,35 @@ class PipelineConfig:
|
|
447 |
localization: LocalizationConfig = field(default_factory=LocalizationConfig)
|
448 |
|
449 |
def create_default_config(api_key: str) -> PipelineConfig:
|
450 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
451 |
|
452 |
class CalendarDataProcessor:
|
453 |
"""Process and structure calendar data from the new raw-data.json format"""
|
@@ -1384,7 +1412,7 @@ class AcademicCalendarRAG:
|
|
1384 |
event_type=query_info.get("event_type"),
|
1385 |
semester=query_info.get("semester"),
|
1386 |
top_k=self.config.retriever.top_k,
|
1387 |
-
weight_semantic=
|
1388 |
)
|
1389 |
|
1390 |
# Add fee information for fee-related queries
|
@@ -1422,10 +1450,6 @@ class AcademicCalendarRAG:
|
|
1422 |
|
1423 |
# # Create config with hybrid retrieval settings
|
1424 |
# config = create_default_config(openai_api_key)
|
1425 |
-
# config.localization.enable_thai_normalization = True
|
1426 |
-
# config.retriever.top_k = 5
|
1427 |
-
# config.model.temperature = 0.3
|
1428 |
-
|
1429 |
# pipeline = AcademicCalendarRAG(config)
|
1430 |
|
1431 |
# # Load and process data
|
@@ -1435,13 +1459,13 @@ class AcademicCalendarRAG:
|
|
1435 |
# pipeline.load_data(raw_data)
|
1436 |
|
1437 |
# # Test queries with different semantic weights
|
1438 |
-
# queries = ["
|
1439 |
|
1440 |
# print("=" * 80)
|
1441 |
|
1442 |
# for query in queries:
|
1443 |
# print(f"\nQuery: {query}")
|
1444 |
-
# result = pipeline.process_query(query
|
1445 |
# print(f"Answer: {result['answer']}")
|
1446 |
# print("-" * 40)
|
1447 |
|
|
|
447 |
localization: LocalizationConfig = field(default_factory=LocalizationConfig)
|
448 |
|
449 |
def create_default_config(api_key: str) -> PipelineConfig:
|
450 |
+
"""
|
451 |
+
Create a default pipeline configuration with optimized settings for Thai language processing.
|
452 |
+
|
453 |
+
Args:
|
454 |
+
api_key (str): OpenAI API key
|
455 |
+
|
456 |
+
Returns:
|
457 |
+
PipelineConfig: Configured pipeline settings
|
458 |
+
"""
|
459 |
+
return PipelineConfig(
|
460 |
+
model=ModelConfig(
|
461 |
+
openai_api_key=api_key,
|
462 |
+
temperature=0.3 # Lower temperature for more focused responses
|
463 |
+
),
|
464 |
+
retriever=RetrieverConfig(
|
465 |
+
top_k=5 # Optimal number of documents to retrieve
|
466 |
+
),
|
467 |
+
cache=CacheConfig(
|
468 |
+
enabled=True,
|
469 |
+
cache_dir=Path("./cache"),
|
470 |
+
ttl=86400 # 24 hour cache
|
471 |
+
),
|
472 |
+
processing=ProcessingConfig(
|
473 |
+
batch_size=32 # Default batch size for processing
|
474 |
+
),
|
475 |
+
localization=LocalizationConfig(
|
476 |
+
enable_thai_normalization=True # Enable Thai text normalization
|
477 |
+
)
|
478 |
+
)
|
479 |
|
480 |
class CalendarDataProcessor:
|
481 |
"""Process and structure calendar data from the new raw-data.json format"""
|
|
|
1412 |
event_type=query_info.get("event_type"),
|
1413 |
semester=query_info.get("semester"),
|
1414 |
top_k=self.config.retriever.top_k,
|
1415 |
+
weight_semantic= 0.3
|
1416 |
)
|
1417 |
|
1418 |
# Add fee information for fee-related queries
|
|
|
1450 |
|
1451 |
# # Create config with hybrid retrieval settings
|
1452 |
# config = create_default_config(openai_api_key)
|
|
|
|
|
|
|
|
|
1453 |
# pipeline = AcademicCalendarRAG(config)
|
1454 |
|
1455 |
# # Load and process data
|
|
|
1459 |
# pipeline.load_data(raw_data)
|
1460 |
|
1461 |
# # Test queries with different semantic weights
|
1462 |
+
# queries = ["มีวิชาหลักหรือวิชาเลือกอะไรบ้าง"]
|
1463 |
|
1464 |
# print("=" * 80)
|
1465 |
|
1466 |
# for query in queries:
|
1467 |
# print(f"\nQuery: {query}")
|
1468 |
+
# result = pipeline.process_query(query)
|
1469 |
# print(f"Answer: {result['answer']}")
|
1470 |
# print("-" * 40)
|
1471 |
|