kevinhug commited on
Commit
8d7a1e9
·
1 Parent(s): 1587277

multi class

Browse files
Files changed (3) hide show
  1. app.py +106 -0
  2. classify.py +167 -0
  3. pii.py +9 -8
app.py CHANGED
@@ -4,6 +4,7 @@ from tool import rival_product
4
  from graphrag import reasoning
5
  from knowledge import graph
6
  from pii import derisk
 
7
 
8
  # Define the Google Analytics script
9
  head = """
@@ -48,6 +49,20 @@ Links:
48
  btn_recommend.click(fn=rbc_product, inputs=in_verbatim, outputs=out_product)
49
 
50
  gr.Markdown("""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  Marketing
52
  ------------
53
  - GraphRAG: Models customer-product relationship networks for next-best-action predictions
@@ -79,6 +94,21 @@ Risk & Audit
79
  btn_recommend=gr.Button("Recommend")
80
  btn_recommend.click(fn=rival_product, inputs=in_verbatim, outputs=out_product)
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  with gr.Tab("graphrag"):
83
  gr.Markdown("""
84
  Objective: Create a Marketing Plan based on persona.
@@ -134,6 +164,15 @@ Low APR and great customer service. I would highly recommend if you’re looking
134
  btn_recommend = gr.Button("Reasoning")
135
  btn_recommend.click(fn=reasoning, inputs=[in_verbatim, in_question], outputs=out_product)
136
 
 
 
 
 
 
 
 
 
 
137
 
138
  with gr.Tab("Knowledge Graph"):
139
  gr.Markdown("""
@@ -154,6 +193,21 @@ Low APR and great customer service. I would highly recommend if you’re looking
154
  btn_recommend = gr.Button("Graph It!")
155
  btn_recommend.click(fn=graph, inputs=in_verbatim, outputs=out_product)
156
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
158
  with gr.Tab("pii masking"):
159
  gr.Markdown("""
@@ -180,5 +234,57 @@ Low APR and great customer service. I would highly recommend if you’re looking
180
  )
181
  btn_recommend = gr.Button("Mask PII")
182
  btn_recommend.click(fn=derisk, inputs=in_verbatim, outputs=out_product)
 
 
 
 
 
 
 
 
 
 
 
 
183
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  demo.launch(allowed_paths=["./"])
 
4
  from graphrag import reasoning
5
  from knowledge import graph
6
  from pii import derisk
7
+ from classify import bucket
8
 
9
  # Define the Google Analytics script
10
  head = """
 
49
  btn_recommend.click(fn=rbc_product, inputs=in_verbatim, outputs=out_product)
50
 
51
  gr.Markdown("""
52
+ Benefits of a Product Recommender System
53
+ ====================
54
+ - Increased Sales & Revenue
55
+ Personalized recommendations drive higher conversion rates and average order value.
56
+
57
+ - Enhanced Customer Experience
58
+ Tailored suggestions make the shopping journey smoother and more relevant.
59
+
60
+ - Customer Retention & Loyalty
61
+ Relevant offers encourage repeat visits and build long-term loyalty.
62
+
63
+ - Inventory Optimization
64
+ Promotes underperforming products or clears surplus stock with strategic recommendations.
65
+
66
  Marketing
67
  ------------
68
  - GraphRAG: Models customer-product relationship networks for next-best-action predictions
 
94
  btn_recommend=gr.Button("Recommend")
95
  btn_recommend.click(fn=rival_product, inputs=in_verbatim, outputs=out_product)
96
 
97
+ gr.Markdown("""
98
+ Benefits of a Competitor Product Recommender
99
+ =================
100
+ - Improved Customer Retention
101
+ Prevents drop-offs by offering similar alternatives when a preferred product is unavailable or suboptimal.
102
+
103
+ - Increased Conversion Rates
104
+ Captures potential lost sales by guiding customers toward viable substitutes.
105
+
106
+ - Builds Trust and Transparency
107
+ Demonstrates a customer-first approach by recommending the best-fit product—even if it’s not your own.
108
+
109
+ - Portfolio Optimization
110
+ Helps businesses learn which competitor features customers prefer, guiding product development and pricing.
111
+ """)
112
  with gr.Tab("graphrag"):
113
  gr.Markdown("""
114
  Objective: Create a Marketing Plan based on persona.
 
164
  btn_recommend = gr.Button("Reasoning")
165
  btn_recommend.click(fn=reasoning, inputs=[in_verbatim, in_question], outputs=out_product)
166
 
167
+ gr.Markdown("""
168
+ Benefits of a Marketing Campaign Generator
169
+ ===============
170
+ - Accelerated Campaign Launches
171
+ Quickly generates tailored campaigns, reducing go-to-market time from weeks to hours.
172
+
173
+ - Improved Targeting & Personalization
174
+ Uses customer data and behavior to craft messages that resonate with specific segments.
175
+ """)
176
 
177
  with gr.Tab("Knowledge Graph"):
178
  gr.Markdown("""
 
193
  btn_recommend = gr.Button("Graph It!")
194
  btn_recommend.click(fn=graph, inputs=in_verbatim, outputs=out_product)
195
 
196
+ gr.Markdown("""
197
+ Benefits of a Knowledge Graph
198
+ ============
199
+ - Smarter Data Relationships
200
+ Connects siloed data across domains to create a holistic, contextual view.
201
+
202
+ - Improved Search & Discovery
203
+ Enables semantic search—understanding meaning, not just keywords.
204
+
205
+ - Enhanced Decision-Making
206
+ Surfaces hidden patterns and relationships for better analytics and insights.
207
+
208
+ - Data Reusability
209
+ Once created, knowledge graphs can be repurposed across multiple use cases (e.g., search, recommendation, fraud detection).
210
+ """)
211
 
212
  with gr.Tab("pii masking"):
213
  gr.Markdown("""
 
234
  )
235
  btn_recommend = gr.Button("Mask PII")
236
  btn_recommend.click(fn=derisk, inputs=in_verbatim, outputs=out_product)
237
+ gr.Markdown("""
238
+ Benefits of Entity Removal
239
+ ==================
240
+ - Data Privacy & Compliance
241
+ Ensures sensitive information (names, emails, phone numbers, etc.) is anonymized to comply with GDPR, HIPAA, or other regulations.
242
+
243
+ - Improved Data Quality
244
+ Removes noise (e.g., irrelevant names or addresses) to make datasets cleaner and more usable for modeling or analysis.
245
+
246
+ - Enhanced Focus for NLP Models
247
+ Allows downstream tasks (like sentiment analysis or topic modeling) to focus on content rather than personal identifiers.
248
+ """)
249
 
250
+
251
+ with gr.Tab("multi class classification"):
252
+ gr.Markdown("""
253
+ Objective: Classify customer feedback into product bucket
254
+ ================================================
255
+ """)
256
+ in_verbatim = gr.Textbox(label="Customer Feedback")
257
+ out_product = gr.Textbox(label="Classification")
258
+
259
+ gr.Examples(
260
+ [
261
+ [
262
+ """
263
+ "The online portal makes managing my mortgage payments so convenient."
264
+ ;"RBC offer great mortgage for my home with competitive rate thank you";
265
+ "Low interest rate compared to other cards I’ve used. Highly recommend for responsible spenders.";
266
+ "The mobile check deposit feature saves me so much time. Banking made easy!";
267
+ "Affordable premiums with great coverage. Switched from my old provider and saved!"
268
+ """
269
+ ]
270
+ ],
271
+ [in_verbatim]
272
+ )
273
+ btn_recommend = gr.Button("Classify")
274
+ btn_recommend.click(fn=bucket, inputs=in_verbatim, outputs=out_product)
275
+ gr.Markdown("""
276
+ Benefits of Multi Class Classification
277
+ ==================
278
+ - Precision Decision-Making
279
+ Automates complex categorization tasks (e.g., loan risk tiers, transaction types) with >90% accuracy, reducing human bias.
280
+
281
+ - Operational Efficiency
282
+ Processes 10,000+ transactions/cases per minute vs. hours manually (e.g., JP Morgan’s COiN platform reduced 360k loan doc hours to seconds).
283
+
284
+ - Risk Mitigation
285
+ Proactively flags 5+ fraud types (identity theft, money laundering) with 40% fewer false positives than rule-based systems.
286
+
287
+ - Regulatory Compliance
288
+ Auto-classifies documents for FINRA/SEC audits (e.g., Morgan Stanley uses NLP to categorize 3M+ annual communications into 50+ compliance buckets).
289
+ """)
290
  demo.launch(allowed_paths=["./"])
classify.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Iterable, Optional
2
+ from pydantic import BaseModel, ValidationInfo, model_validator, Field, field_validator
3
+ import instructor
4
+ import openai
5
+ import asyncio
6
+ import os
7
+
8
+
9
+
10
+ from groq import AsyncGroq
11
+ # Initialize with API key
12
+ client = AsyncGroq(api_key=os.getenv("GROQ_API_KEY"))
13
+
14
+ # Enable instructor patches for Groq client
15
+ client = instructor.from_groq(client)
16
+ """
17
+ import openai
18
+ client = instructor.from_openai(
19
+ openai.AsyncOpenAI(
20
+ base_url="http://localhost:11434/v1",
21
+ api_key="ollama",
22
+ ),
23
+ mode=instructor.Mode.JSON,
24
+ )
25
+ """
26
+ llm = 'llama-3.1-8b-instant' if os.getenv("GROQ_API_KEY") else "deepseek-r1"
27
+
28
+
29
+ class Tag(BaseModel):
30
+
31
+ chain_of_thought:List[str]= Field(default_factory=list, description="the chain of thought led to the prediction", examples=["Let's think step by step. the customer explicitly mention donation, and there is a tag name with donation, tag the text with donation"])
32
+ name: str
33
+ id: int= Field(..., description="id for the specific tag")
34
+ confidence: float = Field(
35
+ default=0.5,
36
+ ge=0,
37
+ le=1,
38
+ description="The confidence of the prediction for name and id, 0 is low, 1 is high",examples=[0.5,0.1,0.9]
39
+ )
40
+
41
+ @field_validator('confidence', mode="after")
42
+ @classmethod
43
+ def high_confidence(cls, c:float):
44
+ "keep tag with confidence 0.6 or above"
45
+ if c < 0.6:
46
+ raise ValueError(f"low confidence `{c}` ")
47
+ return c
48
+
49
+ @model_validator(mode="after")
50
+ def validate_ids(self, info: ValidationInfo):
51
+ context = info.context
52
+ if context:
53
+ tags: List[Tag] = context.get("tags")
54
+
55
+ assert self.id in {
56
+ tag.id for tag in tags
57
+ }, f"Tag ID {self.id} not found in context"
58
+
59
+ assert self.name.lower() in {
60
+ tag.name.lower() for tag in tags
61
+ }, f"Tag name {self.name} not found in context"
62
+ return self
63
+
64
+
65
+
66
+ class TagWithInstructions(Tag):
67
+ instructions: str
68
+
69
+
70
+ class TagRequest(BaseModel):
71
+ texts: List[str]
72
+ tags: List[TagWithInstructions]
73
+
74
+
75
+ class TagResponse(BaseModel):
76
+ texts: List[str]
77
+ predictions: List[Optional[Iterable[Tag]]]=Field(...,default_factory=list)
78
+
79
+ sem = asyncio.Semaphore(2)
80
+
81
+ async def tag_single_request(text: str, tags: List[Tag]) -> Iterable[Tag]:
82
+ allowed_tags = [(tag.id, tag.name) for tag in tags]
83
+ allowed_tags_str = ", ".join([f"`{tag}`" for tag in allowed_tags])
84
+
85
+ async with sem:
86
+
87
+ try:
88
+ result = await client.chat.completions.create(
89
+ model=llm, #"gpt-4o-mini","deepseek-r1", #"llama3.2"
90
+ temperature=0.3,
91
+ max_retries=3,
92
+ messages=[
93
+ {
94
+ "role": "system",
95
+ "content": """You are a world-class text tagging system for customer feedback in the banking industry to classify banking product/services.
96
+ """
97
+ },
98
+ {"role": "user", "content": f"""Create minimum multiple Tag according to instruction most appropriate for the following text: `{text}`
99
+ ### Instruction:
100
+ Here are the allowed Tag(id, name), do not use any other Tag than these: {allowed_tags_str}
101
+ Tag the name based on fact stated and directly mention in the text. Do not guess the name, Do not tag if tag not mention in the text. Do not use implication.
102
+ Calculate the newly created Tag's confidence that Tag fit to the text
103
+
104
+ For each question, show your step-by-step thinking under 'chain_of_thought' in list of string, then clearly state your final answer under 'name'.
105
+ """ },
106
+
107
+ ],
108
+ response_model=Iterable[Tag],
109
+ validation_context={"tags": tags},
110
+ )
111
+ return result
112
+ except Exception as e:
113
+ print(e)
114
+
115
+
116
+ async def tag_request(request: TagRequest) -> TagResponse:
117
+ predictions = await asyncio.gather(
118
+ *[tag_single_request(text, request.tags) for text in request.texts]
119
+ )
120
+ pred_dedup=[]
121
+ for tags in predictions:
122
+ if tags is not None:
123
+ dedup=[]
124
+ #filter(lambda x: x.confidence > 0.7, tags)
125
+ tags_s=sorted(tags, key=lambda x: (x.name, x.confidence))
126
+ if len(tags_s)>0:
127
+ dedup.append(tags_s[0])
128
+ for j in range(1,len(tags_s)):
129
+ if tags_s[j-1].name!=tags_s[j].name:
130
+ dedup.append(tags_s[j])
131
+
132
+ pred_dedup.append(dedup)
133
+ else:
134
+ pred_dedup.append(None)
135
+
136
+ return TagResponse(
137
+ texts=request.texts,
138
+ predictions=pred_dedup,
139
+ )
140
+
141
+
142
+ tags = [
143
+ TagWithInstructions(id=0, name="online", instructions="text related to online banking"),
144
+ TagWithInstructions(id=1, name="card", instructions="text related to credit card"),
145
+ TagWithInstructions(id=2, name="cars", instructions="auto finance"),
146
+ TagWithInstructions(id=3, name="mortgage", instructions="home mortgage"),
147
+ TagWithInstructions(id=4, name="insurance", instructions="insurance"),
148
+ ]
149
+
150
+
151
+ texts = """
152
+ "The online portal makes managing my mortgage payments so convenient."
153
+ ;"RBC offer great mortgage for my home with competitive rate thank you";
154
+ "Low interest rate compared to other cards I’ve used. Highly recommend for responsible spenders.";
155
+ "The mobile check deposit feature saves me so much time. Banking made easy!";
156
+ "Affordable premiums with great coverage. Switched from my old provider and saved!"
157
+ """
158
+
159
+ def bucket(texts):
160
+ texts=texts.split(";")
161
+ request = TagRequest(texts=texts, tags=tags)
162
+
163
+ response = asyncio.run(tag_request(request))
164
+ return response.model_dump_json(indent=2)
165
+
166
+ if __name__=="__main__":
167
+ print(bucket(texts))
pii.py CHANGED
@@ -1,14 +1,8 @@
1
  import os
2
 
3
  import instructor
4
- from groq import Groq
5
  from pydantic import BaseModel
6
 
7
- # Initialize with API key
8
- client = Groq(api_key=os.getenv("GROQ_API_KEY"))
9
-
10
- # Enable instructor patches for Groq client
11
- client = instructor.from_groq(client)
12
  """
13
  client = instructor.from_openai(
14
  OpenAI(
@@ -18,6 +12,13 @@ client = instructor.from_openai(
18
  mode=instructor.Mode.JSON,
19
  )
20
  """
 
 
 
 
 
 
 
21
  llm = 'llama-3.1-8b-instant' if os.getenv("GROQ_API_KEY") else "llama3.2"
22
 
23
 
@@ -34,7 +35,7 @@ class PIIExtraction(BaseModel):
34
  private_data: list[PIIData]
35
  chain_of_thought: str
36
 
37
- def scrub_data(self, content):
38
  """
39
  Iterates over the private data and replaces the value with a placeholder in the form of
40
  <{data_type}_{i}>
@@ -84,4 +85,4 @@ if __name__ == '__main__':
84
 
85
  print(derisk(ESSAY))
86
  # print(pii_leak.model_dump_json(indent=2))
87
- # print(pii_leak.scrub_data(ESSAY))
 
1
  import os
2
 
3
  import instructor
 
4
  from pydantic import BaseModel
5
 
 
 
 
 
 
6
  """
7
  client = instructor.from_openai(
8
  OpenAI(
 
12
  mode=instructor.Mode.JSON,
13
  )
14
  """
15
+ from groq import Groq
16
+ # Initialize with API key
17
+ client = Groq(api_key=os.getenv("GROQ_API_KEY"))
18
+
19
+ # Enable instructor patches for Groq client
20
+ client = instructor.from_groq(client)
21
+
22
  llm = 'llama-3.1-8b-instant' if os.getenv("GROQ_API_KEY") else "llama3.2"
23
 
24
 
 
35
  private_data: list[PIIData]
36
  chain_of_thought: str
37
 
38
+ def sanitize(self, content):
39
  """
40
  Iterates over the private data and replaces the value with a placeholder in the form of
41
  <{data_type}_{i}>
 
85
 
86
  print(derisk(ESSAY))
87
  # print(pii_leak.model_dump_json(indent=2))
88
+ # print(pii_leak.sanitize(ESSAY))