Commit
·
620ca5b
1
Parent(s):
ae9e0c1
updating new preprocess
Browse files
app.py
CHANGED
|
@@ -20,12 +20,29 @@ def zero_shot(doc, candidates):
|
|
| 20 |
return dict(zip(labels, scores))
|
| 21 |
|
| 22 |
#define a function to preprocess transaction query
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
def preprocess(transaction):
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
|
| 31 |
#create input and output objects
|
|
|
|
| 20 |
return dict(zip(labels, scores))
|
| 21 |
|
| 22 |
#define a function to preprocess transaction query
|
| 23 |
+
# def preprocess(transaction):
|
| 24 |
+
# pattern = r'([A-Za-z0-9\s]+)(?:/| |$)'
|
| 25 |
+
# match = re.search(pattern, transaction)
|
| 26 |
+
# if match:
|
| 27 |
+
# return match.group(1).strip()
|
| 28 |
+
# return None
|
| 29 |
+
|
| 30 |
def preprocess(transaction):
|
| 31 |
+
remove_words = ["pos", "mps", "bil", "onl"]
|
| 32 |
+
|
| 33 |
+
# Convert to lowercase
|
| 34 |
+
transaction = transaction.lower()
|
| 35 |
+
|
| 36 |
+
# Remove unwanted words
|
| 37 |
+
for word in remove_words:
|
| 38 |
+
transaction = transaction.replace(word, "")
|
| 39 |
+
|
| 40 |
+
# Remove special characters and digits
|
| 41 |
+
transaction = re.sub(r"[^a-z\s]+", "", transaction)
|
| 42 |
+
|
| 43 |
+
# Remove extra spaces
|
| 44 |
+
transaction = re.sub(r"\s+", " ", transaction).strip()
|
| 45 |
+
return transaction
|
| 46 |
|
| 47 |
|
| 48 |
#create input and output objects
|