analytic
Browse files- app.py +64 -28
- knowledge.py +123 -63
- pii.py +87 -0
app.py
CHANGED
@@ -3,39 +3,36 @@ from rag import rbc_product
|
|
3 |
from tool import rival_product
|
4 |
from graphrag import reasoning
|
5 |
from knowledge import graph
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
with gr.Tab("RAG"):
|
12 |
gr.Markdown("""
|
13 |
-
|
14 |
-
------------
|
15 |
-
GraphRAG: Models customer-product relationship networks for next-best-action predictions
|
16 |
-
|
17 |
-
DSPy: Optimizes cross-sell/upsell prompt variations through A/B testing
|
18 |
-
|
19 |
-
Risk & Audit
|
20 |
------------
|
21 |
-
|
22 |
-
|
23 |
-
Tool Use: Integrates fraud detection APIs, anomaly scoring models, and regulatory compliance checkers
|
24 |
-
|
25 |
-
DSPy: Optimizes fraud explanation prompts for regulatory reporting
|
26 |
-
|
27 |
-
Other Use Case
|
28 |
-
------------
|
29 |
-
https://huggingface.co/spaces/kevinhug/clientX
|
30 |
-
https://kevinwkc.github.io/davinci/
|
31 |
""")
|
32 |
|
33 |
|
34 |
gr.Markdown("""
|
35 |
Objective: Recommend RBC product based on persona.
|
36 |
================================================
|
37 |
-
Retrieval: Public RBC Product Data
|
38 |
-
Recommend: RBC Product
|
39 |
""")
|
40 |
in_verbatim = gr.Textbox(label="Verbatim")
|
41 |
out_product = gr.Textbox(label="Product")
|
@@ -50,13 +47,25 @@ https://kevinwkc.github.io/davinci/
|
|
50 |
btn_recommend=gr.Button("Recommend")
|
51 |
btn_recommend.click(fn=rbc_product, inputs=in_verbatim, outputs=out_product)
|
52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
with gr.Tab("Tool Use"):
|
54 |
gr.Markdown("""
|
55 |
Objective: Recommend financial product based on persona for competitive analysis, product feature discovery
|
56 |
================================================
|
57 |
-
Retrieval: Public Product Data using Tavily Search
|
58 |
-
|
59 |
-
Recommend: Competition Product
|
60 |
""")
|
61 |
in_verbatim = gr.Textbox(label="Verbatim")
|
62 |
out_product = gr.Textbox(label="Product")
|
@@ -74,7 +83,7 @@ https://kevinwkc.github.io/davinci/
|
|
74 |
gr.Markdown("""
|
75 |
Objective: Create a Marketing Plan based on persona.
|
76 |
=======================
|
77 |
-
Reasoning from context, answering the question
|
78 |
""")
|
79 |
|
80 |
marketing = """
|
@@ -145,4 +154,31 @@ Low APR and great customer service. I would highly recommend if you’re looking
|
|
145 |
btn_recommend = gr.Button("Graph It!")
|
146 |
btn_recommend.click(fn=graph, inputs=in_verbatim, outputs=out_product)
|
147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
demo.launch(allowed_paths=["./"])
|
|
|
3 |
from tool import rival_product
|
4 |
from graphrag import reasoning
|
5 |
from knowledge import graph
|
6 |
+
from pii import derisk
|
7 |
+
|
8 |
+
# Define the Google Analytics script
|
9 |
+
head = """
|
10 |
+
<!-- Google tag (gtag.js) -->
|
11 |
+
<script async src="https://www.googletagmanager.com/gtag/js?id=G-SRX9LDVBCW"></script>
|
12 |
+
<script>
|
13 |
+
window.dataLayer = window.dataLayer || [];
|
14 |
+
function gtag(){dataLayer.push(arguments);}
|
15 |
+
gtag('js', new Date());
|
16 |
+
|
17 |
+
gtag('config', 'G-SRX9LDVBCW');
|
18 |
+
</script>
|
19 |
+
"""
|
20 |
+
|
21 |
+
with gr.Blocks(head=head) as demo:
|
22 |
with gr.Tab("RAG"):
|
23 |
gr.Markdown("""
|
24 |
+
Links:
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
------------
|
26 |
+
- https://huggingface.co/spaces/kevinhug/clientX
|
27 |
+
- https://kevinwkc.github.io/davinci/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
""")
|
29 |
|
30 |
|
31 |
gr.Markdown("""
|
32 |
Objective: Recommend RBC product based on persona.
|
33 |
================================================
|
34 |
+
- Retrieval: Public RBC Product Data
|
35 |
+
- Recommend: RBC Product
|
36 |
""")
|
37 |
in_verbatim = gr.Textbox(label="Verbatim")
|
38 |
out_product = gr.Textbox(label="Product")
|
|
|
47 |
btn_recommend=gr.Button("Recommend")
|
48 |
btn_recommend.click(fn=rbc_product, inputs=in_verbatim, outputs=out_product)
|
49 |
|
50 |
+
gr.Markdown("""
|
51 |
+
Marketing
|
52 |
+
------------
|
53 |
+
- GraphRAG: Models customer-product relationship networks for next-best-action predictions
|
54 |
+
- DSPy: Optimizes cross-sell/upsell prompt variations through A/B testing
|
55 |
+
|
56 |
+
Risk & Audit
|
57 |
+
------------
|
58 |
+
- GraphRAG: Maps transactional relationships into dynamic knowledge graphs to detect multi-layered fraud patterns
|
59 |
+
- Tool Use: Integrates fraud detection APIs, anomaly scoring models, and regulatory compliance checkers
|
60 |
+
- DSPy: Optimizes fraud explanation prompts for regulatory reporting
|
61 |
+
""")
|
62 |
+
|
63 |
with gr.Tab("Tool Use"):
|
64 |
gr.Markdown("""
|
65 |
Objective: Recommend financial product based on persona for competitive analysis, product feature discovery
|
66 |
================================================
|
67 |
+
- Retrieval: Public Product Data using Tavily Search
|
68 |
+
- Recommend: Competition Product
|
|
|
69 |
""")
|
70 |
in_verbatim = gr.Textbox(label="Verbatim")
|
71 |
out_product = gr.Textbox(label="Product")
|
|
|
83 |
gr.Markdown("""
|
84 |
Objective: Create a Marketing Plan based on persona.
|
85 |
=======================
|
86 |
+
- Reasoning from context, answering the question
|
87 |
""")
|
88 |
|
89 |
marketing = """
|
|
|
154 |
btn_recommend = gr.Button("Graph It!")
|
155 |
btn_recommend.click(fn=graph, inputs=in_verbatim, outputs=out_product)
|
156 |
|
157 |
+
|
158 |
+
with gr.Tab("pii masking"):
|
159 |
+
gr.Markdown("""
|
160 |
+
Objective: pii data removal
|
161 |
+
================================================
|
162 |
+
""")
|
163 |
+
in_verbatim = gr.Textbox(label="Peronal Info")
|
164 |
+
out_product = gr.Textbox(label="PII")
|
165 |
+
|
166 |
+
gr.Examples(
|
167 |
+
[
|
168 |
+
[
|
169 |
+
"""
|
170 |
+
He Hua (Hua Hua) Director
|
171 | |
172 |
+
+86-28-83505513
|
173 |
+
|
174 |
+
Alternative Address Format:
|
175 |
+
Xiongmao Ave West Section, Jinniu District (listed in some records as 610016 postcode)
|
176 |
+
"""
|
177 |
+
]
|
178 |
+
],
|
179 |
+
[in_verbatim]
|
180 |
+
)
|
181 |
+
btn_recommend = gr.Button("Mask PII")
|
182 |
+
btn_recommend.click(fn=derisk, inputs=in_verbatim, outputs=out_product)
|
183 |
+
|
184 |
demo.launch(allowed_paths=["./"])
|
knowledge.py
CHANGED
@@ -1,33 +1,8 @@
|
|
|
|
1 |
|
2 |
import instructor
|
3 |
-
|
4 |
-
from pydantic import BaseModel, Field
|
5 |
-
from typing import List
|
6 |
from graphviz import Digraph
|
7 |
-
|
8 |
-
class Node(BaseModel, frozen=True):
|
9 |
-
"""
|
10 |
-
Node representing concept in the subject domain
|
11 |
-
"""
|
12 |
-
id: int
|
13 |
-
label: str = Field(..., description = "description of the concept in the subject domain")
|
14 |
-
color: str
|
15 |
-
|
16 |
-
class Edge(BaseModel, frozen=True):
|
17 |
-
"""
|
18 |
-
Edge representing relationship between concepts in the subject domain
|
19 |
-
"""
|
20 |
-
source: int = Field(..., description = "source representing concept in the subject domain")
|
21 |
-
target: int = Field(..., description = "target representing concept in the subject domain")
|
22 |
-
label: str = Field(..., description = "description representing relationship between concepts in the subject domain")
|
23 |
-
color: str = "black"
|
24 |
-
|
25 |
-
class KnowledgeGraph(BaseModel):
|
26 |
-
"""
|
27 |
-
graph representation of concepts in the subject domain
|
28 |
-
"""
|
29 |
-
nodes: List[Node] = Field(..., default_factory=list)
|
30 |
-
edges: List[Edge] = Field(..., default_factory=list)
|
31 |
|
32 |
from groq import Groq
|
33 |
import os
|
@@ -37,7 +12,6 @@ client = Groq(api_key=os.getenv("GROQ_API_KEY"))
|
|
37 |
|
38 |
# Enable instructor patches for Groq client
|
39 |
client = instructor.from_groq(client)
|
40 |
-
llm='llama-3.1-8b-instant' #"llama3.2", #
|
41 |
"""
|
42 |
from openai import OpenAI
|
43 |
client = instructor.from_openai(
|
@@ -48,46 +22,132 @@ client = instructor.from_openai(
|
|
48 |
mode=instructor.Mode.JSON,
|
49 |
)
|
50 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
def generate_graph(q, input) -> KnowledgeGraph:
|
52 |
-
|
53 |
model=llm,
|
54 |
max_retries=5,
|
55 |
messages=[
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
],
|
61 |
-
response_model=KnowledgeGraph
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
|
65 |
-
class Issue(BaseModel):
|
66 |
-
"Break down Issue as sub issues"
|
67 |
-
question: str
|
68 |
-
|
69 |
-
class IssuePlan(BaseModel):
|
70 |
-
"List of Issue"
|
71 |
-
issue_graph: List[Issue] = []
|
72 |
-
|
73 |
-
|
74 |
-
def expandIssue(input) -> IssuePlan:
|
75 |
-
return client.chat.completions.create(
|
76 |
-
model=llm,
|
77 |
-
max_retries=10,
|
78 |
-
messages=[
|
79 |
-
{
|
80 |
-
"role": "system",
|
81 |
-
"content": "As a Mckinsey Consultant create a framework that relevant to the topic, list all issues.",
|
82 |
-
}, {
|
83 |
-
"role": "user",
|
84 |
-
"content": f"Question: {input}",
|
85 |
-
},
|
86 |
-
],
|
87 |
-
response_model=IssuePlan,
|
88 |
-
)
|
89 |
def graph(query):
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
|
|
1 |
+
from typing import List
|
2 |
|
3 |
import instructor
|
|
|
|
|
|
|
4 |
from graphviz import Digraph
|
5 |
+
from pydantic import BaseModel, Field
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
from groq import Groq
|
8 |
import os
|
|
|
12 |
|
13 |
# Enable instructor patches for Groq client
|
14 |
client = instructor.from_groq(client)
|
|
|
15 |
"""
|
16 |
from openai import OpenAI
|
17 |
client = instructor.from_openai(
|
|
|
22 |
mode=instructor.Mode.JSON,
|
23 |
)
|
24 |
"""
|
25 |
+
llm = 'llama-3.1-8b-instant' if os.getenv("GROQ_API_KEY") else "llama3.2"
|
26 |
+
|
27 |
+
class Node(BaseModel, frozen=True):
|
28 |
+
"""
|
29 |
+
Node representing concept in the subject domain
|
30 |
+
"""
|
31 |
+
id: int= Field(...,
|
32 |
+
description="unique id of the concept in the subject domain, used for deduplication, design a scheme allows multiple concept")
|
33 |
+
label: str = Field(..., description="description of the concept in the subject domain")
|
34 |
+
color: str = "orange"
|
35 |
+
|
36 |
+
|
37 |
+
class Edge(BaseModel, frozen=True):
|
38 |
+
"""
|
39 |
+
Edge representing relationship between concepts in the subject domain, source depends on target
|
40 |
+
"""
|
41 |
+
source: int = Field(..., description="source representing concept in the subject domain")
|
42 |
+
target: int = Field(..., description="target representing concept in the subject domain")
|
43 |
+
label: str = Field(..., description="description representing relationship between concepts in the subject domain")
|
44 |
+
color: str = "black"
|
45 |
+
|
46 |
+
|
47 |
+
from typing import Optional
|
48 |
+
|
49 |
+
|
50 |
+
class KnowledgeGraph(BaseModel):
|
51 |
+
"""
|
52 |
+
KnowledgeGraph is graph representation of concepts in the subject domain
|
53 |
+
"""
|
54 |
+
nodes: Optional[List[Node]] = Field(..., default_factory=list)
|
55 |
+
edges: Optional[List[Edge]] = Field(..., default_factory=list)
|
56 |
+
|
57 |
+
def update(self, other: "KnowledgeGraph") -> "KnowledgeGraph":
|
58 |
+
"""Updates the current graph with the other graph, deduplicating nodes and edges."""
|
59 |
+
return KnowledgeGraph(
|
60 |
+
nodes=list(set(self.nodes + other.nodes)),
|
61 |
+
edges=list(set(self.edges + other.edges)),
|
62 |
+
)
|
63 |
+
|
64 |
+
def draw(self, prefix: str = "knowledge_graph"):
|
65 |
+
dot = Digraph(comment="Knowledge Graph")
|
66 |
+
|
67 |
+
for node in self.nodes:
|
68 |
+
dot.node(str(node.id), node.label, color=node.color)
|
69 |
+
|
70 |
+
for edge in self.edges:
|
71 |
+
dot.edge(
|
72 |
+
str(edge.source), str(edge.target), label=edge.label, color=edge.color
|
73 |
+
)
|
74 |
+
dot.render(prefix, format="png", view=True)
|
75 |
+
|
76 |
+
|
77 |
+
from typing import Iterable
|
78 |
+
from textwrap import dedent
|
79 |
+
|
80 |
+
|
81 |
def generate_graph(q, input) -> KnowledgeGraph:
|
82 |
+
return client.chat.completions.create(
|
83 |
model=llm,
|
84 |
max_retries=5,
|
85 |
messages=[
|
86 |
+
{
|
87 |
+
"role": "user",
|
88 |
+
"content": dedent(f"""Help me understand the following by describing it as a detailed knowledge graph:
|
89 |
+
### Question: {q}
|
90 |
+
### Context: {input}
|
91 |
+
### Instruction:
|
92 |
+
Generate at least 5 concepts
|
93 |
+
Generate at least 3 relationship
|
94 |
+
### Output Format:
|
95 |
+
Node with id, label for description of the concept
|
96 |
+
Edge with source's id, target's id, label for description of the relationship between source concept and target concept
|
97 |
+
"""),
|
98 |
+
|
99 |
+
}
|
100 |
],
|
101 |
+
response_model=KnowledgeGraph)
|
102 |
+
|
103 |
+
|
104 |
+
class Subissue(BaseModel):
|
105 |
+
subissue_title: str
|
106 |
+
point: List[str] = Field(default_factory=list, description="Specific aspect or component of the subissue")
|
107 |
+
|
108 |
+
|
109 |
+
def expandIssue(input) -> Iterable[Subissue]:
|
110 |
+
response = client.chat.completions.create(
|
111 |
+
model=llm,
|
112 |
+
max_retries=3,
|
113 |
+
response_model=Iterable[Subissue],
|
114 |
+
temperature=0.1,
|
115 |
+
messages=[
|
116 |
+
|
117 |
+
{
|
118 |
+
"role": "user",
|
119 |
+
"content": dedent(f"""
|
120 |
+
As a McKinsey Consultant, perform MECE decomposition of the question.
|
121 |
+
### Requirements
|
122 |
+
1. Return 3 subissues minimum
|
123 |
+
2. Each sub-issue has 3 bullet points, which each new point beginning with a *
|
124 |
+
3. Use EXACT format:
|
125 |
+
|
126 |
+
- [Sub-issue 1.1 title]
|
127 |
+
* [point 1]
|
128 |
+
* [point 2]
|
129 |
+
* [point 3]
|
130 |
+
- [Sub-issue 1.2 title]
|
131 |
+
* [point 1]
|
132 |
+
* [point 2]
|
133 |
+
* [point 3]
|
134 |
+
- [Sub-issue 1.3 title]
|
135 |
+
* [point 1]
|
136 |
+
* [point 2]
|
137 |
+
* [point 3]
|
138 |
+
|
139 |
+
4. return nothing else
|
140 |
+
### Question: {input}
|
141 |
+
"""),
|
142 |
+
},
|
143 |
+
],
|
144 |
+
)
|
145 |
+
|
146 |
+
return response
|
147 |
|
148 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
def graph(query):
|
150 |
+
queryx = expandIssue(query)
|
151 |
+
|
152 |
+
graph = generate_graph(query, str(queryx))
|
153 |
+
return graph.json()
|
pii.py
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import instructor
|
4 |
+
from groq import Groq
|
5 |
+
from pydantic import BaseModel
|
6 |
+
|
7 |
+
# Initialize with API key
|
8 |
+
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
|
9 |
+
|
10 |
+
# Enable instructor patches for Groq client
|
11 |
+
client = instructor.from_groq(client)
|
12 |
+
"""
|
13 |
+
client = instructor.from_openai(
|
14 |
+
OpenAI(
|
15 |
+
base_url="http://localhost:11434/v1",
|
16 |
+
api_key="ollama",
|
17 |
+
),
|
18 |
+
mode=instructor.Mode.JSON,
|
19 |
+
)
|
20 |
+
"""
|
21 |
+
llm = 'llama-3.1-8b-instant' if os.getenv("GROQ_API_KEY") else "llama3.2"
|
22 |
+
|
23 |
+
|
24 |
+
class PIIData(BaseModel):
|
25 |
+
index: int
|
26 |
+
data_type: str
|
27 |
+
pii_value: str
|
28 |
+
|
29 |
+
|
30 |
+
class PIIExtraction(BaseModel):
|
31 |
+
"""
|
32 |
+
Extracted PII data from a document, all data_types should try to have consistent property names
|
33 |
+
"""
|
34 |
+
private_data: list[PIIData]
|
35 |
+
chain_of_thought: str
|
36 |
+
|
37 |
+
def scrub_data(self, content):
|
38 |
+
"""
|
39 |
+
Iterates over the private data and replaces the value with a placeholder in the form of
|
40 |
+
<{data_type}_{i}>
|
41 |
+
"""
|
42 |
+
|
43 |
+
for i, data in enumerate(self.private_data):
|
44 |
+
content = content.replace(data.pii_value, f"<{data.data_type}_{i}>")
|
45 |
+
|
46 |
+
return content
|
47 |
+
|
48 |
+
|
49 |
+
def derisk(content) -> PIIExtraction:
|
50 |
+
return client.chat.completions.create(
|
51 |
+
model=llm,
|
52 |
+
response_model=PIIExtraction,
|
53 |
+
temperature=0.2,
|
54 |
+
messages=[
|
55 |
+
{
|
56 |
+
"role": "system",
|
57 |
+
"content": "You are a world class international PII scrubbing model, perform data preprocess include standardization, stop word removal, punctuation removal...to enhance signal to noise ratio for name, phone, address, email, id...etc. Extract the PII data from the following document",
|
58 |
+
|
59 |
+
}, {
|
60 |
+
"role": "user",
|
61 |
+
"content": {content},
|
62 |
+
}
|
63 |
+
]).model_dump_json(indent=2)
|
64 |
+
|
65 |
+
|
66 |
+
if __name__ == '__main__':
|
67 |
+
ESSAY = """
|
68 |
+
He Hua (Hua Hua) Director
|
69 | |
70 |
+
+86-28-83505513
|
71 |
+
|
72 |
+
Alternative Address Format:
|
73 |
+
Xiongmao Ave West Section, Jinniu District (listed in some records as 610016 postcode)
|
74 |
+
|
75 |
+
|
76 |
+
Best Viewing: Before 9:00 AM during summer hours (7:30 AM-5:00 PM)
|
77 |
+
|
78 |
+
Caretaker: Tan Jintao ("Grandpa Tan")
|
79 |
+
|
80 |
+
Additional Contacts
|
81 |
+
Charitable Donations: +86-28-83505513
|
82 |
+
Dining Reservations: +86-17311072681
|
83 |
+
"""
|
84 |
+
|
85 |
+
print(derisk(ESSAY))
|
86 |
+
# print(pii_leak.model_dump_json(indent=2))
|
87 |
+
# print(pii_leak.scrub_data(ESSAY))
|