multi class
Browse files- app.py +106 -0
- classify.py +167 -0
- pii.py +9 -8
app.py
CHANGED
@@ -4,6 +4,7 @@ from tool import rival_product
|
|
4 |
from graphrag import reasoning
|
5 |
from knowledge import graph
|
6 |
from pii import derisk
|
|
|
7 |
|
8 |
# Define the Google Analytics script
|
9 |
head = """
|
@@ -48,6 +49,20 @@ Links:
|
|
48 |
btn_recommend.click(fn=rbc_product, inputs=in_verbatim, outputs=out_product)
|
49 |
|
50 |
gr.Markdown("""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
Marketing
|
52 |
------------
|
53 |
- GraphRAG: Models customer-product relationship networks for next-best-action predictions
|
@@ -79,6 +94,21 @@ Risk & Audit
|
|
79 |
btn_recommend=gr.Button("Recommend")
|
80 |
btn_recommend.click(fn=rival_product, inputs=in_verbatim, outputs=out_product)
|
81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
with gr.Tab("graphrag"):
|
83 |
gr.Markdown("""
|
84 |
Objective: Create a Marketing Plan based on persona.
|
@@ -134,6 +164,15 @@ Low APR and great customer service. I would highly recommend if you’re looking
|
|
134 |
btn_recommend = gr.Button("Reasoning")
|
135 |
btn_recommend.click(fn=reasoning, inputs=[in_verbatim, in_question], outputs=out_product)
|
136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
|
138 |
with gr.Tab("Knowledge Graph"):
|
139 |
gr.Markdown("""
|
@@ -154,6 +193,21 @@ Low APR and great customer service. I would highly recommend if you’re looking
|
|
154 |
btn_recommend = gr.Button("Graph It!")
|
155 |
btn_recommend.click(fn=graph, inputs=in_verbatim, outputs=out_product)
|
156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
|
158 |
with gr.Tab("pii masking"):
|
159 |
gr.Markdown("""
|
@@ -180,5 +234,57 @@ Low APR and great customer service. I would highly recommend if you’re looking
|
|
180 |
)
|
181 |
btn_recommend = gr.Button("Mask PII")
|
182 |
btn_recommend.click(fn=derisk, inputs=in_verbatim, outputs=out_product)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
demo.launch(allowed_paths=["./"])
|
|
|
4 |
from graphrag import reasoning
|
5 |
from knowledge import graph
|
6 |
from pii import derisk
|
7 |
+
from classify import bucket
|
8 |
|
9 |
# Define the Google Analytics script
|
10 |
head = """
|
|
|
49 |
btn_recommend.click(fn=rbc_product, inputs=in_verbatim, outputs=out_product)
|
50 |
|
51 |
gr.Markdown("""
|
52 |
+
Benefits of a Product Recommender System
|
53 |
+
====================
|
54 |
+
- Increased Sales & Revenue
|
55 |
+
Personalized recommendations drive higher conversion rates and average order value.
|
56 |
+
|
57 |
+
- Enhanced Customer Experience
|
58 |
+
Tailored suggestions make the shopping journey smoother and more relevant.
|
59 |
+
|
60 |
+
- Customer Retention & Loyalty
|
61 |
+
Relevant offers encourage repeat visits and build long-term loyalty.
|
62 |
+
|
63 |
+
- Inventory Optimization
|
64 |
+
Promotes underperforming products or clears surplus stock with strategic recommendations.
|
65 |
+
|
66 |
Marketing
|
67 |
------------
|
68 |
- GraphRAG: Models customer-product relationship networks for next-best-action predictions
|
|
|
94 |
btn_recommend=gr.Button("Recommend")
|
95 |
btn_recommend.click(fn=rival_product, inputs=in_verbatim, outputs=out_product)
|
96 |
|
97 |
+
gr.Markdown("""
|
98 |
+
Benefits of a Competitor Product Recommender
|
99 |
+
=================
|
100 |
+
- Improved Customer Retention
|
101 |
+
Prevents drop-offs by offering similar alternatives when a preferred product is unavailable or suboptimal.
|
102 |
+
|
103 |
+
- Increased Conversion Rates
|
104 |
+
Captures potential lost sales by guiding customers toward viable substitutes.
|
105 |
+
|
106 |
+
- Builds Trust and Transparency
|
107 |
+
Demonstrates a customer-first approach by recommending the best-fit product—even if it’s not your own.
|
108 |
+
|
109 |
+
- Portfolio Optimization
|
110 |
+
Helps businesses learn which competitor features customers prefer, guiding product development and pricing.
|
111 |
+
""")
|
112 |
with gr.Tab("graphrag"):
|
113 |
gr.Markdown("""
|
114 |
Objective: Create a Marketing Plan based on persona.
|
|
|
164 |
btn_recommend = gr.Button("Reasoning")
|
165 |
btn_recommend.click(fn=reasoning, inputs=[in_verbatim, in_question], outputs=out_product)
|
166 |
|
167 |
+
gr.Markdown("""
|
168 |
+
Benefits of a Marketing Campaign Generator
|
169 |
+
===============
|
170 |
+
- Accelerated Campaign Launches
|
171 |
+
Quickly generates tailored campaigns, reducing go-to-market time from weeks to hours.
|
172 |
+
|
173 |
+
- Improved Targeting & Personalization
|
174 |
+
Uses customer data and behavior to craft messages that resonate with specific segments.
|
175 |
+
""")
|
176 |
|
177 |
with gr.Tab("Knowledge Graph"):
|
178 |
gr.Markdown("""
|
|
|
193 |
btn_recommend = gr.Button("Graph It!")
|
194 |
btn_recommend.click(fn=graph, inputs=in_verbatim, outputs=out_product)
|
195 |
|
196 |
+
gr.Markdown("""
|
197 |
+
Benefits of a Knowledge Graph
|
198 |
+
============
|
199 |
+
- Smarter Data Relationships
|
200 |
+
Connects siloed data across domains to create a holistic, contextual view.
|
201 |
+
|
202 |
+
- Improved Search & Discovery
|
203 |
+
Enables semantic search—understanding meaning, not just keywords.
|
204 |
+
|
205 |
+
- Enhanced Decision-Making
|
206 |
+
Surfaces hidden patterns and relationships for better analytics and insights.
|
207 |
+
|
208 |
+
- Data Reusability
|
209 |
+
Once created, knowledge graphs can be repurposed across multiple use cases (e.g., search, recommendation, fraud detection).
|
210 |
+
""")
|
211 |
|
212 |
with gr.Tab("pii masking"):
|
213 |
gr.Markdown("""
|
|
|
234 |
)
|
235 |
btn_recommend = gr.Button("Mask PII")
|
236 |
btn_recommend.click(fn=derisk, inputs=in_verbatim, outputs=out_product)
|
237 |
+
gr.Markdown("""
|
238 |
+
Benefits of Entity Removal
|
239 |
+
==================
|
240 |
+
- Data Privacy & Compliance
|
241 |
+
Ensures sensitive information (names, emails, phone numbers, etc.) is anonymized to comply with GDPR, HIPAA, or other regulations.
|
242 |
+
|
243 |
+
- Improved Data Quality
|
244 |
+
Removes noise (e.g., irrelevant names or addresses) to make datasets cleaner and more usable for modeling or analysis.
|
245 |
+
|
246 |
+
- Enhanced Focus for NLP Models
|
247 |
+
Allows downstream tasks (like sentiment analysis or topic modeling) to focus on content rather than personal identifiers.
|
248 |
+
""")
|
249 |
|
250 |
+
|
251 |
+
with gr.Tab("multi class classification"):
|
252 |
+
gr.Markdown("""
|
253 |
+
Objective: Classify customer feedback into product bucket
|
254 |
+
================================================
|
255 |
+
""")
|
256 |
+
in_verbatim = gr.Textbox(label="Customer Feedback")
|
257 |
+
out_product = gr.Textbox(label="Classification")
|
258 |
+
|
259 |
+
gr.Examples(
|
260 |
+
[
|
261 |
+
[
|
262 |
+
"""
|
263 |
+
"The online portal makes managing my mortgage payments so convenient."
|
264 |
+
;"RBC offer great mortgage for my home with competitive rate thank you";
|
265 |
+
"Low interest rate compared to other cards I’ve used. Highly recommend for responsible spenders.";
|
266 |
+
"The mobile check deposit feature saves me so much time. Banking made easy!";
|
267 |
+
"Affordable premiums with great coverage. Switched from my old provider and saved!"
|
268 |
+
"""
|
269 |
+
]
|
270 |
+
],
|
271 |
+
[in_verbatim]
|
272 |
+
)
|
273 |
+
btn_recommend = gr.Button("Classify")
|
274 |
+
btn_recommend.click(fn=bucket, inputs=in_verbatim, outputs=out_product)
|
275 |
+
gr.Markdown("""
|
276 |
+
Benefits of Multi Class Classification
|
277 |
+
==================
|
278 |
+
- Precision Decision-Making
|
279 |
+
Automates complex categorization tasks (e.g., loan risk tiers, transaction types) with >90% accuracy, reducing human bias.
|
280 |
+
|
281 |
+
- Operational Efficiency
|
282 |
+
Processes 10,000+ transactions/cases per minute vs. hours manually (e.g., JP Morgan’s COiN platform reduced 360k loan doc hours to seconds).
|
283 |
+
|
284 |
+
- Risk Mitigation
|
285 |
+
Proactively flags 5+ fraud types (identity theft, money laundering) with 40% fewer false positives than rule-based systems.
|
286 |
+
|
287 |
+
- Regulatory Compliance
|
288 |
+
Auto-classifies documents for FINRA/SEC audits (e.g., Morgan Stanley uses NLP to categorize 3M+ annual communications into 50+ compliance buckets).
|
289 |
+
""")
|
290 |
demo.launch(allowed_paths=["./"])
|
classify.py
ADDED
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List, Iterable, Optional
|
2 |
+
from pydantic import BaseModel, ValidationInfo, model_validator, Field, field_validator
|
3 |
+
import instructor
|
4 |
+
import openai
|
5 |
+
import asyncio
|
6 |
+
import os
|
7 |
+
|
8 |
+
|
9 |
+
|
10 |
+
from groq import AsyncGroq
|
11 |
+
# Initialize with API key
|
12 |
+
client = AsyncGroq(api_key=os.getenv("GROQ_API_KEY"))
|
13 |
+
|
14 |
+
# Enable instructor patches for Groq client
|
15 |
+
client = instructor.from_groq(client)
|
16 |
+
"""
|
17 |
+
import openai
|
18 |
+
client = instructor.from_openai(
|
19 |
+
openai.AsyncOpenAI(
|
20 |
+
base_url="http://localhost:11434/v1",
|
21 |
+
api_key="ollama",
|
22 |
+
),
|
23 |
+
mode=instructor.Mode.JSON,
|
24 |
+
)
|
25 |
+
"""
|
26 |
+
llm = 'llama-3.1-8b-instant' if os.getenv("GROQ_API_KEY") else "deepseek-r1"
|
27 |
+
|
28 |
+
|
29 |
+
class Tag(BaseModel):
|
30 |
+
|
31 |
+
chain_of_thought:List[str]= Field(default_factory=list, description="the chain of thought led to the prediction", examples=["Let's think step by step. the customer explicitly mention donation, and there is a tag name with donation, tag the text with donation"])
|
32 |
+
name: str
|
33 |
+
id: int= Field(..., description="id for the specific tag")
|
34 |
+
confidence: float = Field(
|
35 |
+
default=0.5,
|
36 |
+
ge=0,
|
37 |
+
le=1,
|
38 |
+
description="The confidence of the prediction for name and id, 0 is low, 1 is high",examples=[0.5,0.1,0.9]
|
39 |
+
)
|
40 |
+
|
41 |
+
@field_validator('confidence', mode="after")
|
42 |
+
@classmethod
|
43 |
+
def high_confidence(cls, c:float):
|
44 |
+
"keep tag with confidence 0.6 or above"
|
45 |
+
if c < 0.6:
|
46 |
+
raise ValueError(f"low confidence `{c}` ")
|
47 |
+
return c
|
48 |
+
|
49 |
+
@model_validator(mode="after")
|
50 |
+
def validate_ids(self, info: ValidationInfo):
|
51 |
+
context = info.context
|
52 |
+
if context:
|
53 |
+
tags: List[Tag] = context.get("tags")
|
54 |
+
|
55 |
+
assert self.id in {
|
56 |
+
tag.id for tag in tags
|
57 |
+
}, f"Tag ID {self.id} not found in context"
|
58 |
+
|
59 |
+
assert self.name.lower() in {
|
60 |
+
tag.name.lower() for tag in tags
|
61 |
+
}, f"Tag name {self.name} not found in context"
|
62 |
+
return self
|
63 |
+
|
64 |
+
|
65 |
+
|
66 |
+
class TagWithInstructions(Tag):
|
67 |
+
instructions: str
|
68 |
+
|
69 |
+
|
70 |
+
class TagRequest(BaseModel):
|
71 |
+
texts: List[str]
|
72 |
+
tags: List[TagWithInstructions]
|
73 |
+
|
74 |
+
|
75 |
+
class TagResponse(BaseModel):
|
76 |
+
texts: List[str]
|
77 |
+
predictions: List[Optional[Iterable[Tag]]]=Field(...,default_factory=list)
|
78 |
+
|
79 |
+
sem = asyncio.Semaphore(2)
|
80 |
+
|
81 |
+
async def tag_single_request(text: str, tags: List[Tag]) -> Iterable[Tag]:
|
82 |
+
allowed_tags = [(tag.id, tag.name) for tag in tags]
|
83 |
+
allowed_tags_str = ", ".join([f"`{tag}`" for tag in allowed_tags])
|
84 |
+
|
85 |
+
async with sem:
|
86 |
+
|
87 |
+
try:
|
88 |
+
result = await client.chat.completions.create(
|
89 |
+
model=llm, #"gpt-4o-mini","deepseek-r1", #"llama3.2"
|
90 |
+
temperature=0.3,
|
91 |
+
max_retries=3,
|
92 |
+
messages=[
|
93 |
+
{
|
94 |
+
"role": "system",
|
95 |
+
"content": """You are a world-class text tagging system for customer feedback in the banking industry to classify banking product/services.
|
96 |
+
"""
|
97 |
+
},
|
98 |
+
{"role": "user", "content": f"""Create minimum multiple Tag according to instruction most appropriate for the following text: `{text}`
|
99 |
+
### Instruction:
|
100 |
+
Here are the allowed Tag(id, name), do not use any other Tag than these: {allowed_tags_str}
|
101 |
+
Tag the name based on fact stated and directly mention in the text. Do not guess the name, Do not tag if tag not mention in the text. Do not use implication.
|
102 |
+
Calculate the newly created Tag's confidence that Tag fit to the text
|
103 |
+
|
104 |
+
For each question, show your step-by-step thinking under 'chain_of_thought' in list of string, then clearly state your final answer under 'name'.
|
105 |
+
""" },
|
106 |
+
|
107 |
+
],
|
108 |
+
response_model=Iterable[Tag],
|
109 |
+
validation_context={"tags": tags},
|
110 |
+
)
|
111 |
+
return result
|
112 |
+
except Exception as e:
|
113 |
+
print(e)
|
114 |
+
|
115 |
+
|
116 |
+
async def tag_request(request: TagRequest) -> TagResponse:
|
117 |
+
predictions = await asyncio.gather(
|
118 |
+
*[tag_single_request(text, request.tags) for text in request.texts]
|
119 |
+
)
|
120 |
+
pred_dedup=[]
|
121 |
+
for tags in predictions:
|
122 |
+
if tags is not None:
|
123 |
+
dedup=[]
|
124 |
+
#filter(lambda x: x.confidence > 0.7, tags)
|
125 |
+
tags_s=sorted(tags, key=lambda x: (x.name, x.confidence))
|
126 |
+
if len(tags_s)>0:
|
127 |
+
dedup.append(tags_s[0])
|
128 |
+
for j in range(1,len(tags_s)):
|
129 |
+
if tags_s[j-1].name!=tags_s[j].name:
|
130 |
+
dedup.append(tags_s[j])
|
131 |
+
|
132 |
+
pred_dedup.append(dedup)
|
133 |
+
else:
|
134 |
+
pred_dedup.append(None)
|
135 |
+
|
136 |
+
return TagResponse(
|
137 |
+
texts=request.texts,
|
138 |
+
predictions=pred_dedup,
|
139 |
+
)
|
140 |
+
|
141 |
+
|
142 |
+
tags = [
|
143 |
+
TagWithInstructions(id=0, name="online", instructions="text related to online banking"),
|
144 |
+
TagWithInstructions(id=1, name="card", instructions="text related to credit card"),
|
145 |
+
TagWithInstructions(id=2, name="cars", instructions="auto finance"),
|
146 |
+
TagWithInstructions(id=3, name="mortgage", instructions="home mortgage"),
|
147 |
+
TagWithInstructions(id=4, name="insurance", instructions="insurance"),
|
148 |
+
]
|
149 |
+
|
150 |
+
|
151 |
+
texts = """
|
152 |
+
"The online portal makes managing my mortgage payments so convenient."
|
153 |
+
;"RBC offer great mortgage for my home with competitive rate thank you";
|
154 |
+
"Low interest rate compared to other cards I’ve used. Highly recommend for responsible spenders.";
|
155 |
+
"The mobile check deposit feature saves me so much time. Banking made easy!";
|
156 |
+
"Affordable premiums with great coverage. Switched from my old provider and saved!"
|
157 |
+
"""
|
158 |
+
|
159 |
+
def bucket(texts):
|
160 |
+
texts=texts.split(";")
|
161 |
+
request = TagRequest(texts=texts, tags=tags)
|
162 |
+
|
163 |
+
response = asyncio.run(tag_request(request))
|
164 |
+
return response.model_dump_json(indent=2)
|
165 |
+
|
166 |
+
if __name__=="__main__":
|
167 |
+
print(bucket(texts))
|
pii.py
CHANGED
@@ -1,14 +1,8 @@
|
|
1 |
import os
|
2 |
|
3 |
import instructor
|
4 |
-
from groq import Groq
|
5 |
from pydantic import BaseModel
|
6 |
|
7 |
-
# Initialize with API key
|
8 |
-
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
|
9 |
-
|
10 |
-
# Enable instructor patches for Groq client
|
11 |
-
client = instructor.from_groq(client)
|
12 |
"""
|
13 |
client = instructor.from_openai(
|
14 |
OpenAI(
|
@@ -18,6 +12,13 @@ client = instructor.from_openai(
|
|
18 |
mode=instructor.Mode.JSON,
|
19 |
)
|
20 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
llm = 'llama-3.1-8b-instant' if os.getenv("GROQ_API_KEY") else "llama3.2"
|
22 |
|
23 |
|
@@ -34,7 +35,7 @@ class PIIExtraction(BaseModel):
|
|
34 |
private_data: list[PIIData]
|
35 |
chain_of_thought: str
|
36 |
|
37 |
-
def
|
38 |
"""
|
39 |
Iterates over the private data and replaces the value with a placeholder in the form of
|
40 |
<{data_type}_{i}>
|
@@ -84,4 +85,4 @@ if __name__ == '__main__':
|
|
84 |
|
85 |
print(derisk(ESSAY))
|
86 |
# print(pii_leak.model_dump_json(indent=2))
|
87 |
-
# print(pii_leak.
|
|
|
1 |
import os
|
2 |
|
3 |
import instructor
|
|
|
4 |
from pydantic import BaseModel
|
5 |
|
|
|
|
|
|
|
|
|
|
|
6 |
"""
|
7 |
client = instructor.from_openai(
|
8 |
OpenAI(
|
|
|
12 |
mode=instructor.Mode.JSON,
|
13 |
)
|
14 |
"""
|
15 |
+
from groq import Groq
|
16 |
+
# Initialize with API key
|
17 |
+
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
|
18 |
+
|
19 |
+
# Enable instructor patches for Groq client
|
20 |
+
client = instructor.from_groq(client)
|
21 |
+
|
22 |
llm = 'llama-3.1-8b-instant' if os.getenv("GROQ_API_KEY") else "llama3.2"
|
23 |
|
24 |
|
|
|
35 |
private_data: list[PIIData]
|
36 |
chain_of_thought: str
|
37 |
|
38 |
+
def sanitize(self, content):
|
39 |
"""
|
40 |
Iterates over the private data and replaces the value with a placeholder in the form of
|
41 |
<{data_type}_{i}>
|
|
|
85 |
|
86 |
print(derisk(ESSAY))
|
87 |
# print(pii_leak.model_dump_json(indent=2))
|
88 |
+
# print(pii_leak.sanitize(ESSAY))
|