JirasakJo commited on
Commit
d3040a9
·
verified ·
1 Parent(s): 17fb98b

Update calendar_rag.py

Browse files
Files changed (1) hide show
  1. calendar_rag.py +32 -8
calendar_rag.py CHANGED
@@ -447,7 +447,35 @@ class PipelineConfig:
447
  localization: LocalizationConfig = field(default_factory=LocalizationConfig)
448
 
449
  def create_default_config(api_key: str) -> PipelineConfig:
450
- return PipelineConfig(model=ModelConfig(openai_api_key=api_key))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
451
 
452
  class CalendarDataProcessor:
453
  """Process and structure calendar data from the new raw-data.json format"""
@@ -1384,7 +1412,7 @@ class AcademicCalendarRAG:
1384
  event_type=query_info.get("event_type"),
1385
  semester=query_info.get("semester"),
1386
  top_k=self.config.retriever.top_k,
1387
- weight_semantic=weight_semantic
1388
  )
1389
 
1390
  # Add fee information for fee-related queries
@@ -1422,10 +1450,6 @@ class AcademicCalendarRAG:
1422
 
1423
  # # Create config with hybrid retrieval settings
1424
  # config = create_default_config(openai_api_key)
1425
- # config.localization.enable_thai_normalization = True
1426
- # config.retriever.top_k = 5
1427
- # config.model.temperature = 0.3
1428
-
1429
  # pipeline = AcademicCalendarRAG(config)
1430
 
1431
  # # Load and process data
@@ -1435,13 +1459,13 @@ class AcademicCalendarRAG:
1435
  # pipeline.load_data(raw_data)
1436
 
1437
  # # Test queries with different semantic weights
1438
- # queries = ["เปิดเทอมวันเเรกวันไหน"]
1439
 
1440
  # print("=" * 80)
1441
 
1442
  # for query in queries:
1443
  # print(f"\nQuery: {query}")
1444
- # result = pipeline.process_query(query, weight_semantic=0.3)
1445
  # print(f"Answer: {result['answer']}")
1446
  # print("-" * 40)
1447
 
 
447
  localization: LocalizationConfig = field(default_factory=LocalizationConfig)
448
 
449
  def create_default_config(api_key: str) -> PipelineConfig:
450
+ """
451
+ Create a default pipeline configuration with optimized settings for Thai language processing.
452
+
453
+ Args:
454
+ api_key (str): OpenAI API key
455
+
456
+ Returns:
457
+ PipelineConfig: Configured pipeline settings
458
+ """
459
+ return PipelineConfig(
460
+ model=ModelConfig(
461
+ openai_api_key=api_key,
462
+ temperature=0.3 # Lower temperature for more focused responses
463
+ ),
464
+ retriever=RetrieverConfig(
465
+ top_k=5 # Optimal number of documents to retrieve
466
+ ),
467
+ cache=CacheConfig(
468
+ enabled=True,
469
+ cache_dir=Path("./cache"),
470
+ ttl=86400 # 24 hour cache
471
+ ),
472
+ processing=ProcessingConfig(
473
+ batch_size=32 # Default batch size for processing
474
+ ),
475
+ localization=LocalizationConfig(
476
+ enable_thai_normalization=True # Enable Thai text normalization
477
+ )
478
+ )
479
 
480
  class CalendarDataProcessor:
481
  """Process and structure calendar data from the new raw-data.json format"""
 
1412
  event_type=query_info.get("event_type"),
1413
  semester=query_info.get("semester"),
1414
  top_k=self.config.retriever.top_k,
1415
+ weight_semantic= 0.3
1416
  )
1417
 
1418
  # Add fee information for fee-related queries
 
1450
 
1451
  # # Create config with hybrid retrieval settings
1452
  # config = create_default_config(openai_api_key)
 
 
 
 
1453
  # pipeline = AcademicCalendarRAG(config)
1454
 
1455
  # # Load and process data
 
1459
  # pipeline.load_data(raw_data)
1460
 
1461
  # # Test queries with different semantic weights
1462
+ # queries = ["มีวิชาหลักหรือวิชาเลือกอะไรบ้าง"]
1463
 
1464
  # print("=" * 80)
1465
 
1466
  # for query in queries:
1467
  # print(f"\nQuery: {query}")
1468
+ # result = pipeline.process_query(query)
1469
  # print(f"Answer: {result['answer']}")
1470
  # print("-" * 40)
1471