import gradio as gr import time from datetime import datetime import pandas as pd from sentence_transformers import SentenceTransformer from qdrant_client import QdrantClient from qdrant_client.models import Filter, FieldCondition, MatchValue import os from symspellpy.symspellpy import SymSpell, Verbosity qdrant_client = QdrantClient( url=os.environ.get("Qdrant_url"), api_key=os.environ.get("Qdrant_api"), ) # โมเดลที่โหลดล่วงหน้า models = { "E5 (intfloat/multilingual-e5-small)": SentenceTransformer('intfloat/multilingual-e5-small'), "E5 large instruct (multilingual-e5-large-instruct)": SentenceTransformer("intfloat/multilingual-e5-large-instruct"), "Kalm (KaLM-embedding-multilingual-mini-v1)": SentenceTransformer('HIT-TMG/KaLM-embedding-multilingual-mini-v1') } model_config = { "E5 (intfloat/multilingual-e5-small)": { "func": lambda query: models["E5 (intfloat/multilingual-e5-small)"].encode("query: " + query), "collection": "product_E5" }, "E5 large instruct (multilingual-e5-large-instruct)": { "func": lambda query: models["E5 large instruct (multilingual-e5-large-instruct)"].encode( "Instruct: Given a product search query, retrieve relevant product listings\nQuery: " + query, convert_to_tensor=False, normalize_embeddings=True), "collection": "product_E5_large_instruct" }, "Kalm (KaLM-embedding-multilingual-mini-v1)": { "func": lambda query: models["Kalm (KaLM-embedding-multilingual-mini-v1)"].encode(query, normalize_embeddings=True), "collection": "product_kalm" } } # Global memory to hold feedback state latest_query_result = {"query": "", "result": "", "model": ""} symspell = SymSpell(max_dictionary_edit_distance=2) symspell.load_dictionary("symspell_dict_pythainlp.txt", term_index=0, count_index=1) # แก้คำผิด def correct_query_with_symspell(query: str) -> str: # ถ้า query มีคำเดียว ใช้ lookup ปกติ if len(query.strip().split()) == 1: suggestions = symspell.lookup(query, Verbosity.CLOSEST, max_edit_distance=2) else: suggestions = symspell.lookup_compound(query, 2) if suggestions: return suggestions[0].term return query # 🌟 Main search function def search_product(query, model_name): start_time = time.time() if model_name not in model_config: return "❌ ไม่พบโมเดล" # ✨ แทรกขั้นตอน fuzzy correction corrected_query = correct_query_with_symspell(query) query_embed = model_config[model_name]["func"](corrected_query) collection_name = model_config[model_name]["collection"] try: result = qdrant_client.query_points( collection_name=collection_name, query=query_embed.tolist(), with_payload=True, query_filter=Filter( must=[FieldCondition(key="type", match=MatchValue(value="product"))] ), limit=10 ).points except Exception as e: return f"❌ Qdrant error: {str(e)}" elapsed = time.time() - start_time output = f"⏱ Time: {elapsed:.2f}s\n" if corrected_query != query: output += f"🔧 แก้คำค้นจาก: `{query}` → `{corrected_query}`\n\n" output += f"📦 ผลลัพธ์:\n" result_summary = "" for res in result: line = f"- {res.payload.get('name', '')} (score: {res.score:.4f})" output += line + "\n" result_summary += line + " | " latest_query_result["query"] = corrected_query latest_query_result["result"] = result_summary.strip() latest_query_result["model"] = model_name return output # 📝 Logging feedback def log_feedback(feedback): now = datetime.now().isoformat() log_entry = { "timestamp": now, "model": latest_query_result["model"], "query": latest_query_result["query"], "result": latest_query_result["result"], "feedback": feedback } df = pd.DataFrame([log_entry]) df.to_csv("feedback_log.csv", mode='a', header=not pd.io.common.file_exists("feedback_log.csv"), index=False) return f"✅ Feedback saved: {feedback}" # 🎨 Gradio UI with gr.Blocks() as demo: gr.Markdown("## 🔎 Product Semantic Search (Vector Search + Qdrant)") with gr.Row(): model_selector = gr.Dropdown( choices=list(models.keys()), label="เลือกโมเดล", value="E5 (intfloat/multilingual-e5-small)" ) query_input = gr.Textbox(label="พิมพ์คำค้นหา") result_output = gr.Textbox(label="📋 ผลลัพธ์") with gr.Row(): match_btn = gr.Button("✅ ตรง") not_match_btn = gr.Button("❌ ไม่ตรง") feedback_status = gr.Textbox(label="📬 สถานะ Feedback") # Events submit_fn = lambda q, m: search_product(q, m) query_input.submit(submit_fn, inputs=[query_input, model_selector], outputs=result_output) match_btn.click(lambda: log_feedback("match"), outputs=feedback_status) not_match_btn.click(lambda: log_feedback("not_match"), outputs=feedback_status) # Run app demo.launch(share=True)