File size: 3,973 Bytes
826f9a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import re
import spacy 
from sentence_transformers import SentenceTransformer
import numpy as np
import random
from datetime import datetime, timedelta
from dateutil.parser import parse as parse_date

# A simplistic Ungrounded Answer Generator.

class UngroundedAnswerGenerator:
    def __init__(self):
        self.nlp = spacy.load("en_core_web_sm")
        self.sim_model = SentenceTransformer('all-MiniLM-L6-v2')

        # 
        self.financial_terms = [
                "CommBank Credit Card",
                "Personal credit cards",
                "Business credit cards",
                "PIN",
                "ePayments Code",
                "Conditions of Use",
                "Schedule of Credit Card Particulars",
                "Banking Code of Practice",
                "NetBank",
                "CommBank app",
                "Electronic Banking Terms and Conditions",
                "Tap & Pay",
                "cash advance",
                "credit limit",
                "ATM cash withdrawals",
                "international transaction fee",
                "Mastercard",
                "Visa",
                "balance transfers",
                "regular payments",
                "additional cardholder",
                "digital wallet",
                "statements and notices",
                "closing balance",
                "minimum payment",
                "interest-free period on purchases",
                "SurePay instalment plan",
                "AutoPay",
                "fees and interest rates",
                "annual interest rates",
                "daily interest rate",
                "statement period",
                "balance transfer period",
                "unauthorised transaction",
                "card scheme refunds",
                "purchase plan",
                "card balance plan",
                "cash advance balance plan",
                "instalment setup fee",
                "purchase balance",
                "cash advances balance",
                "interest rate for the plan",
                "credit card account",
                "default under your contract"
            ]


    
    def generate(self, context: str, answer: str) -> str:
        strategy = self._select_strategy(answer)
        return strategy(context, answer)
    
    def _select_strategy(self, answer: str):
        doc = self.nlp(answer)
        ents = [ent.label_ for ent in doc.ents]
        
        if "DATE" in ents:
            return self._perturb_dates
        if any(e in ["MONEY", "PERCENT"] for e in ents):
            return self._perturb_numbers
        
        return self._semantic_distractor
    
    def _perturb_numbers(self, context: str, answer: str) -> str:
        if "$" in answer:
            base = self._extract_number(answer)
            return f"${base * random.uniform(0.8, 1.2):.2f}"
        elif "%" in answer:
            base = self._extract_number(answer)
            return f"{base * random.uniform(0.5, 1.5):.1f}%"
        return answer
    
    def _perturb_dates(self, context: str, answer: str) -> str:
        try:
            dt = parse_date(answer)
            if dt:
                delta = timedelta(days=random.randint(-30, 30))
                return (dt + delta).strftime("%Y-%m-%d")
        except:
            pass
        return answer
    
    def _semantic_distractor(self, context: str, answer: str) -> str:
        answer_emb = self.sim_model.encode(answer)
        term_embs = self.sim_model.encode(self.financial_terms)
        similarities = np.dot(term_embs, answer_emb)
        return self.financial_terms[np.argsort(similarities)[-2]]
    
    def _extract_number(self, text: str) -> float:
        try:
            return float(re.search(r"\d+\.?\d*", text).group())
        except:
            return random.uniform(1, 1000)