hf_rabbit_life_poc / app_p /utils /query_rewriter.py
SUMANA SUMANAKUL (ING)
first commit
30adccc
raw
history blame
2.7 kB
import os
import google.generativeai as genai
import json
# --- สร้าง LLM สำหรับการ Rewrite โดยเฉพาะ ---
try:
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
rewriter_model = genai.GenerativeModel('gemini-2.0-flash',
generation_config={"temperature": 0.0}
)
print("✅ Initialized Rewriter LLM.")
except Exception as e:
rewriter_model = None
print(f"‼️ ERROR initializing Rewriter LLM: {e}")
REWRITE_PROMPT_TEMPLATE = """
You are a highly precise query rewriting expert. Your one and only goal is to transform a follow-up or keyword-based question into a clear, standalone Thai question for a database search.
**Core Logic:**
1. Analyze the `Chat History` to understand the context.
2. Analyze the user's `Latest Input`.
3. Combine the context and the latest input to construct a new, complete, and natural-sounding question in Thai.
**Crucial Rules:**
- The output MUST be a single, complete Thai question.
- Do not add explanations.
- If the `Latest Input` is already a complete question, just return it after minor corrections if needed.
**Example 1 (Keywords):**
- History: []
- Latest Input: "worry free cancer"
- Your Standalone Output: "ประกัน Worry Free Cancer คุ้มครองอะไรบ้าง"
**Example 2 (Follow-up):**
- History: [{"role": "user", "parts": [{"text":"Sabai Jai คืออะไร"}]}, {"role": "model", "parts": [{"text":"Sabai Jai เป็นประกัน..."}]}]
- Latest Input: "แล้วลดหย่อนภาษีได้ไหม"
- Your Standalone Output: "ประกัน Sabai Jai สามารถนำไปลดหย่อนภาษีได้หรือไม่"
---
**Chat History:**
{chat_history}
**Latest Input:** "{question}"
**Your Standalone Output:**
"""
def rewrite_query(question: str, chat_history: list) -> str:
"""
Rewrites a user's query to be a standalone question using chat history.
"""
if not rewriter_model:
print("-> Rewriter LLM not available. Returning original query.")
return question
print(f"🔄 Rewriting query: '{question}'")
prompt = REWRITE_PROMPT_TEMPLATE.format(
chat_history=json.dumps(chat_history[-4:]), # ใช้แค่ 2 รอบล่าสุด
question=question
)
try:
response = rewriter_model.generate_content(prompt)
rewritten = response.text.strip()
print(f"✅ Rewritten query: '{rewritten}'")
return rewritten
except Exception as e:
print(f"‼️ ERROR during query rewrite: {e}. Returning original query.")
return question