import gradio as gr import time from datetime import datetime import pandas as pd from sentence_transformers import SentenceTransformer from qdrant_client import QdrantClient from qdrant_client.models import Filter, FieldCondition, MatchValue import os from symspellpy.symspellpy import SymSpell, Verbosity from pythainlp.tokenize import word_tokenize from pyairtable import Table from pyairtable import Api qdrant_client = QdrantClient( url=os.environ.get("Qdrant_url"), api_key=os.environ.get("Qdrant_api"), ) AIRTABLE_API_KEY = os.environ.get("airtable_api") BASE_ID = os.environ.get("airtable_baseid") TABLE_NAME = "Feedback_search" # หรือเปลี่ยนชื่อให้ชัดเช่น 'Feedback' api = Api(AIRTABLE_API_KEY) table = api.table(BASE_ID, TABLE_NAME) # โมเดลที่โหลดล่วงหน้า models = { "E5 (intfloat/multilingual-e5-small)": SentenceTransformer('intfloat/multilingual-e5-small'), "E5 large instruct (multilingual-e5-large-instruct)": SentenceTransformer("intfloat/multilingual-e5-large-instruct"), "Kalm (KaLM-embedding-multilingual-mini-v1)": SentenceTransformer('HIT-TMG/KaLM-embedding-multilingual-mini-v1') } model_config = { "E5 (intfloat/multilingual-e5-small)": { "func": lambda query: models["E5 (intfloat/multilingual-e5-small)"].encode("query: " + query), "collection": "product_E5", }, "E5 large instruct (multilingual-e5-large-instruct)": { "func": lambda query: models["E5 large instruct (multilingual-e5-large-instruct)"].encode( "Instruct: Given a product search query, retrieve relevant product listings\nQuery: " + query, convert_to_tensor=False, normalize_embeddings=True), "collection": "product_E5_large_instruct", }, "Kalm (KaLM-embedding-multilingual-mini-v1)": { "func": lambda query: models["Kalm (KaLM-embedding-multilingual-mini-v1)"].encode(query, normalize_embeddings=True), "collection": "product_kalm", } } # Global memory to hold feedback state latest_query_result = {"query": "", "result": "", "model": "", "raw_query":""} symspell = SymSpell(max_dictionary_edit_distance=2) symspell.load_pickle("symspell_fast.pkl") # แก้คำผิด def correct_query_with_symspell(query: str) -> str: # ถ้าคำเดียว → ใช้ lookup ปกติ (ดีที่สุด) if len(query.strip().split()) == 1: suggestions = symspell.lookup(query, Verbosity.CLOSEST, max_edit_distance=2) return suggestions[0].term if suggestions else query # ตัดคำ words = word_tokenize(query.strip(), engine="newmm") corrected = [] for word in words: # หากความยาวคำเดิม > 4 และแก้ไม่ได้ → ลองแก้ทั้งคำเดิมแทน suggestions = symspell.lookup(word, Verbosity.CLOSEST, max_edit_distance=2) if suggestions: corrected.append(suggestions[0].term) else: # ลองแก้ทั้ง word แบบ raw (กรณี word ถูกตัดผิด เช่น "ปิดปอง") alt_suggestions = symspell.lookup_compound(word, 2) if alt_suggestions and alt_suggestions[0].term != word: corrected.append(alt_suggestions[0].term) else: corrected.append(word) return " ".join(corrected) # 🌟 Main search function def search_product(query, model_name): start_time = time.time() if model_name not in model_config: return "
❌ ไม่พบโมเดล
" latest_query_result["raw_query"] = query corrected_query = correct_query_with_symspell(query) query_embed = model_config[model_name]["func"](corrected_query) collection_name = model_config[model_name]["collection"] try: result = qdrant_client.query_points( collection_name=collection_name, query=query_embed.tolist(), with_payload=True, query_filter=Filter(must=[FieldCondition(key="type", match=MatchValue(value="product"))]), limit=10 ).points except Exception as e: return f"❌ Qdrant error: {str(e)}
" elapsed = time.time() - start_time html_output = f"⏱ {elapsed:.2f} วินาที
" if corrected_query != query: html_output += f"🔧 แก้คำค้นจาก: {query}
→ {corrected_query}