Commit
·
620ca5b
1
Parent(s):
ae9e0c1
updating new preprocess
Browse files
app.py
CHANGED
@@ -20,12 +20,29 @@ def zero_shot(doc, candidates):
|
|
20 |
return dict(zip(labels, scores))
|
21 |
|
22 |
#define a function to preprocess transaction query
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
def preprocess(transaction):
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
|
31 |
#create input and output objects
|
|
|
20 |
return dict(zip(labels, scores))
|
21 |
|
22 |
#define a function to preprocess transaction query
|
23 |
+
# def preprocess(transaction):
|
24 |
+
# pattern = r'([A-Za-z0-9\s]+)(?:/| |$)'
|
25 |
+
# match = re.search(pattern, transaction)
|
26 |
+
# if match:
|
27 |
+
# return match.group(1).strip()
|
28 |
+
# return None
|
29 |
+
|
30 |
def preprocess(transaction):
|
31 |
+
remove_words = ["pos", "mps", "bil", "onl"]
|
32 |
+
|
33 |
+
# Convert to lowercase
|
34 |
+
transaction = transaction.lower()
|
35 |
+
|
36 |
+
# Remove unwanted words
|
37 |
+
for word in remove_words:
|
38 |
+
transaction = transaction.replace(word, "")
|
39 |
+
|
40 |
+
# Remove special characters and digits
|
41 |
+
transaction = re.sub(r"[^a-z\s]+", "", transaction)
|
42 |
+
|
43 |
+
# Remove extra spaces
|
44 |
+
transaction = re.sub(r"\s+", " ", transaction).strip()
|
45 |
+
return transaction
|
46 |
|
47 |
|
48 |
#create input and output objects
|