Spaces:
Sleeping
Sleeping
fix rapidfuzz partial ratio
Browse files
app.py
CHANGED
|
@@ -6,7 +6,6 @@ from qdrant_client import QdrantClient
|
|
| 6 |
from qdrant_client.models import Filter, FieldCondition, MatchValue
|
| 7 |
import os
|
| 8 |
from rapidfuzz import fuzz
|
| 9 |
-
from pythainlp.tokenize import word_tokenize
|
| 10 |
from pyairtable import Table
|
| 11 |
from pyairtable import Api
|
| 12 |
import re
|
|
@@ -73,15 +72,20 @@ def search_product(query):
|
|
| 73 |
scored = []
|
| 74 |
for r in result:
|
| 75 |
name = r.payload.get("name", "")
|
| 76 |
-
|
| 77 |
# ถ้า query สั้นเกินไป ให้ fuzzy_score = 0 เพื่อกันเพี้ยน
|
| 78 |
if len(corrected_query) >= 3 and name:
|
| 79 |
-
|
| 80 |
else:
|
| 81 |
-
|
| 82 |
-
|
| 83 |
# รวม hybrid score
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
scored.append((r, hybrid_score))
|
| 86 |
|
| 87 |
# เรียงตาม hybrid score แล้วกรองผลลัพธ์ที่ hybrid score ต่ำเกิน
|
|
@@ -96,10 +100,10 @@ def search_product(query):
|
|
| 96 |
result_summary, found = "", False
|
| 97 |
|
| 98 |
for res in result:
|
| 99 |
-
if res.score >= threshold:
|
| 100 |
found = True
|
| 101 |
name = res.payload.get("name", "ไม่ทราบชื่อสินค้า")
|
| 102 |
-
score = f"{res.score:.4f}"
|
| 103 |
img_url = res.payload.get("imageUrl", "")
|
| 104 |
price = res.payload.get("price", "ไม่ระบุ")
|
| 105 |
brand = res.payload.get("brand", "")
|
|
|
|
| 6 |
from qdrant_client.models import Filter, FieldCondition, MatchValue
|
| 7 |
import os
|
| 8 |
from rapidfuzz import fuzz
|
|
|
|
| 9 |
from pyairtable import Table
|
| 10 |
from pyairtable import Api
|
| 11 |
import re
|
|
|
|
| 72 |
scored = []
|
| 73 |
for r in result:
|
| 74 |
name = r.payload.get("name", "")
|
| 75 |
+
|
| 76 |
# ถ้า query สั้นเกินไป ให้ fuzzy_score = 0 เพื่อกันเพี้ยน
|
| 77 |
if len(corrected_query) >= 3 and name:
|
| 78 |
+
fuzzy_score = fuzz.partial_ratio(corrected_query, name) / 100.0
|
| 79 |
else:
|
| 80 |
+
fuzzy_score = 0.0
|
|
|
|
| 81 |
# รวม hybrid score
|
| 82 |
+
if fuzzy_score < 0.5:
|
| 83 |
+
hybrid_score = r.score
|
| 84 |
+
else:
|
| 85 |
+
hybrid_score = 0.7 * r.score + 0.3 * fuzzy_score
|
| 86 |
+
r.payload["score"] = hybrid_score # เก็บลง payload ใช้เทียบ treshold ตอนเเสดงผล
|
| 87 |
+
r.payload["fuzzy_score"] = fuzzy_score # เก็บไว้เผื่อ debug
|
| 88 |
+
r.payload['semantic_score'] = r.score # เก็บไว้เผื่อ debug
|
| 89 |
scored.append((r, hybrid_score))
|
| 90 |
|
| 91 |
# เรียงตาม hybrid score แล้วกรองผลลัพธ์ที่ hybrid score ต่ำเกิน
|
|
|
|
| 100 |
result_summary, found = "", False
|
| 101 |
|
| 102 |
for res in result:
|
| 103 |
+
if res.payload["score"] >= threshold:
|
| 104 |
found = True
|
| 105 |
name = res.payload.get("name", "ไม่ทราบชื่อสินค้า")
|
| 106 |
+
score = f"{res.payload['score']:.4f}"
|
| 107 |
img_url = res.payload.get("imageUrl", "")
|
| 108 |
price = res.payload.get("price", "ไม่ระบุ")
|
| 109 |
brand = res.payload.get("brand", "")
|