File size: 2,813 Bytes
5ebb309
 
 
 
 
 
 
2ea584d
5ebb309
2ea584d
5ebb309
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from pydantic import BaseModel, Field
from typing import List


import instructor
import os

from groq import Groq
# Initialize with API key
client = Groq(api_key=os.getenv("GROQ_API_KEY"))

# Enable instructor patches for Groq client
client = instructor.from_groq(client)
"""
import openai
client = instructor.from_openai(
    openai.OpenAI(
        base_url="http://localhost:11434/v1",
        api_key="ollama",
    ),
    mode=instructor.Mode.JSON,
)
"""

llm = 'llama-3.1-8b-instant' if os.getenv("GROQ_API_KEY") else "qwen2.5"

class Property(BaseModel):
    key: str
    value: str
    resolved_absolute_value: str


class Entity(BaseModel):
    id: int = Field(
        ...,
        description="Unique identifier for the entity, used for deduplication, design a scheme allows multiple entities",
    )
    subquote_string: List[str] = Field(
        ...,
        description="Correctly resolved value of the entity, if the entity is a reference to another entity, this should be the id of the referenced entity, include a few more words before and after the value to allow for some context to be used in the resolution",
    )
    entity_title: str
    properties: List[Property] = Field(
        ..., description="List of properties of the entity such as date, amount...etc", examples=[Property(key="Amount", value="+200", resolved_absolute_value="300"),
                                                                                                  Property(key="Date", value="-5", resolved_absolute_value="2018-09-18")]
    )
    dependencies: List[int] = Field(
        ...,
        description="List of entity ids that this entity depends  or relies on to resolve it",
    )


class DocumentExtraction(BaseModel):
    entities: List[Entity] = Field(
        ...,
        description="Body of the answer, each fact should be a separate object with a body and a list of sources such as Organization, Agreement Date, Asset...etc",
    )


def entity_graph(content) -> DocumentExtraction:
    return client.chat.completions.create(
        model=llm, #"deepseek-r1", #"gpt-4","llama3.2", #
        response_model=DocumentExtraction,
        temperature=0.1,
        messages=[
            {
                "role": "system",
                "content": "You're world class entities resolution system. Ensure that each entity and its attributes are correctly resolved, meaning duplicates are merged and dependencies are established. Extract and resolve a list of entities from the following document:",

            },
            {
                "role": "user",
                "content": content,
            },
        ],
    )

def resolve(content):
    model = entity_graph(content)
    return model.model_dump_json(indent=2)

if __name__=='__main__':
    content=""
    print(resolve(content))