ai / knowledge.py
kevinhug's picture
image correct
73b7507
raw
history blame
7.29 kB
from typing import List
import instructor
from graphviz import Digraph
from pydantic import BaseModel, Field
import os
from datetime import date
from groq import Groq
# Initialize with API key
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
# Enable instructor patches for Groq client
client = instructor.from_groq(client)
"""
from openai import OpenAI
client = instructor.from_openai(
OpenAI(
base_url="http://localhost:11434/v1",
api_key="ollama",
),
mode=instructor.Mode.JSON,
)
"""
llm = 'llama-3.1-8b-instant' if os.getenv("GROQ_API_KEY") else "qwen2.5"
class Node(BaseModel, frozen=True):
"""
Node representing concept in the subject domain
"""
id: int= Field(...,
description="unique id of the concept in the subject domain, used for deduplication, design a scheme allows multiple concept")
label: str = Field(..., description="description of the concept in the subject domain")
color: str = "orange"
record_date: date = Field(..., description="the date that this Node is recorded")
class Edge(BaseModel, frozen=True):
"""
Edge representing relationship between concepts in the subject domain, source depends on target
"""
source: int = Field(..., description="source representing concept in the subject domain")
target: int = Field(..., description="target representing concept in the subject domain")
label: str = Field(..., description="description representing relationship between concepts in the subject domain")
color: str = "black"
from typing import Optional
class KnowledgeGraph(BaseModel):
"""
KnowledgeGraph is graph representation of concepts in the subject domain
"""
nodes: Optional[List[Node]] = Field(..., default_factory=list)
edges: Optional[List[Edge]] = Field(..., default_factory=list)
def update(self, other: "KnowledgeGraph") -> "KnowledgeGraph":
"""Updates the current graph with the other graph, deduplicating nodes and edges."""
return KnowledgeGraph(
nodes=list(set(self.nodes + other.nodes)),
edges=list(set(self.edges + other.edges)),
)
def draw(self, prefix: str = "knowledge_graph"):
dot = Digraph(comment="Knowledge Graph")
for node in self.nodes:
dot.node(str(node.id), node.label, color=node.color)
for edge in self.edges:
dot.edge(
str(edge.source), str(edge.target), label=edge.label, color=edge.color
)
dot.render(prefix, format="png", view=True)
from typing import Iterable
from textwrap import dedent
def generate_graph(q, input=KnowledgeGraph()) -> KnowledgeGraph:
state= client.chat.completions.create(
model=llm,
max_retries=5,
messages=[
{
"role": "user",
"content": dedent(f"""As a world class iterative knowledge graph builder and a Marketing Data Scientist for delivery personalized solution in Personal and Commercial Banking. Help me understand this person pain points and needs by describing the interaction as a detailed knowledge graph:
### Interaction: {q}
### Merge from existing KnowledgeGraph, Here is the current state of the graph:
{input.model_dump_json()}
### Instruction:
Generate at least 2 concepts
Generate at least 2 relationships
Append them into current state without duplication
### Output Format:
Node with id, label for description of the concept
Edge with source's id, target's id, label for description of the relationship between source's concept and target's concept
"""),
}
],
response_model=KnowledgeGraph)
return input.update(state)
class Subissue(BaseModel):
subissue_title: str
point: List[str] = Field(default_factory=list, description="Specific aspect or component of the subissue")
def expandIssue(input) -> Iterable[Subissue]:
response = client.chat.completions.create(
model=llm,
max_retries=3,
response_model=Iterable[Subissue],
temperature=0.1,
messages=[
{
"role": "user",
"content": dedent(f"""
As a McKinsey Consultant, perform MECE decomposition of the question.
### Requirements
1. Return 3 subissues minimum
2. Each sub-issue has 3 bullet points, which each new point beginning with a *
3. Use EXACT format:
- [Sub-issue 1.1 title]
* [point 1]
* [point 2]
* [point 3]
- [Sub-issue 1.2 title]
* [point 1]
* [point 2]
* [point 3]
- [Sub-issue 1.3 title]
* [point 1]
* [point 2]
* [point 3]
4. return nothing else
### Question: {input}
"""),
},
],
)
return response
def graph(query, queryx):
#queryx = expandIssue(query)
if queryx.strip() == "":
graph = generate_graph(query)
else:
graph = generate_graph(query, KnowledgeGraph.model_validate_json(queryx))
return graph.model_dump_json(indent=2)
if __name__=="__main__":
query="""
Representative: "Thank you for calling Goldman Sachs Credit Card Services. My name is Sarah. May I have your full name and the last 4 digits of your card number for verification?"
Customer: "This is Michael Chen, card ending 5402."
Representative: "Thank you, Mr. Chen. I show you have an Apple Card account opened in 2023. How can I assist you today?" (Reference: Search Result Apple Card context)
Customer: "I'm disputing a $329 charge from TechElectronics from March 15th. I never received the item."
Representative: "I understand your concern. Let me initiate a dispute investigation. Per our process (Search Result BBB complaint handling):
We'll apply a temporary credit within 24 hours
Our team will contact the merchant
You'll receive email updates at [email protected]
Final resolution within 60 days
Would you like me to proceed?"
Customer: "Yes, but what if they fight it?"
Representative: "If the merchant disputes your claim, we'll:
Review all evidence using our 3-phase verification system (Search Result multi-stage investigation)
Consider your purchase protection benefits
Escalate to senior specialists if needed
For security, never share your CVV (339) or full card number with callers. Always call back using the number on your physical card (Search Result scam warning)."
Customer: "Can I make a partial payment on my balance while this is pending?"
Representative: "Absolutely. We offer:
"minimum_payment": "$35 due April 25",
"hardship_program": "0% APR for 6 months",
"custom_plan": "Split $600 balance over 3 months"
Would you like to enroll in any of these?"
Customer: "The 3-month plan, please."
Representative: "Confirmed. Your next payment of $200 will process May 1st. A confirmation email with dispute case #GS-2025-0422-8830 is being sent now. Is there anything else?"
Customer: "No, thank you."
"""
graph = generate_graph(query)
graph2 = generate_graph("My mortgage rate is 9%, I cannot afford it anymore, I need to refinance and I'm unemploy right now.", graph)
graph2.draw()