File size: 3,271 Bytes
13fbd2e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from llama_index.core import VectorStoreIndex
from llama_index.core import SimpleDirectoryReader
from llama_index.core import SummaryIndex
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.agent import ReActAgent
from llama_index.llms.ollama import Ollama
from langchain_community.embeddings.ollama import OllamaEmbeddings
from llama_index.core import load_index_from_storage, StorageContext
from llama_index.core.node_parser import SentenceSplitter
import os
from llama_index.core import Settings

Settings.llm = Ollama(model="llama3")
Settings.embed_model = OllamaEmbeddings(model="llama3")

docs = ["./alice.pdf", "./ReAct.pdf"]
topic_docs = {}
for doc in docs:
    doc_name = doc.split(".")[1].split("/")[-1]
    topic_docs[doc_name] = SimpleDirectoryReader(input_files=[doc]).load_data()

node_paser = SentenceSplitter()
agents = {}
query_engines = {}

all_nodes = []
for id, document in topic_docs.items():
    nodes = node_paser.get_nodes_from_documents(document)
    all_nodes.extend(nodes)
    
    if not os.path.exists(f"./{id}"):
        vector_index = VectorStoreIndex(nodes=nodes, show_progress=True)
        vector_index.storage_context.persist(persist_dir=f"./{id}")
    else:
        vector_index = load_index_from_storage(StorageContext.from_defaults(persist_dir=f"./{id}"))
    
    summary_index = SummaryIndex(nodes=nodes, show_progress=True)
    summary_index.storage_context.persist(persist_dir=f"./summary-{id}")
    
    vector_query_engine = vector_index.as_query_engine()
    summary_query_engine = summary_index.as_query_engine()
    
    query_engine_tools = [
        QueryEngineTool(
            query_engine=vector_query_engine,
            metadata=ToolMetadata(
                name="vector_tool",
                description=f"Useful for specific aspects of {id}"
            ),
        ),
        QueryEngineTool(
            query_engine=summary_query_engine,
            metadata=ToolMetadata(
                name="summary_tool",
                description=f"Useful for any request that require a holistic summary about {id}"
            )
        )
    ]
    
    agent = ReActAgent.from_tools(
        query_engine_tools,
        llm=Settings.llm,
        verbose=True,
    )
    
    agents[id] = agent
    query_engines[id] = vector_index.as_query_engine(similarity_top_k=2)

all_tools = []
for key, docu in topic_docs.items():
    print(f"Processing {key}")
    print("-------------------------------------")
    summary = (
        f"This content contains info about {key}"
        f"Use this tool if want to answer any question about {key}."
    )
    doc_tool = QueryEngineTool(
        query_engine=agents[key],
        metadata=ToolMetadata(
            name=f"tool_{key}",
            description=summary
        ),
    )
    all_tools.append(doc_tool)

from llama_index.core.objects import ObjectIndex
obj_index = ObjectIndex.from_objects(
    all_tools,
    index_cls=VectorStoreIndex,
)

top_agent = ReActAgent.from_tools(
    tool_retriever=obj_index.as_retriever(similarity_top_k=1),
    verbose=True
)

base_index = VectorStoreIndex(all_nodes)
base_query_engine = base_index.as_query_engine(similarity_top_k=4)

response = top_agent.query("Why did Alice run after the rabbit?")