SexBot / multi_doc_agent.py
Pew404's picture
Upload folder using huggingface_hub
13fbd2e verified
from llama_index.core import VectorStoreIndex
from llama_index.core import SimpleDirectoryReader
from llama_index.core import SummaryIndex
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.agent import ReActAgent
from llama_index.llms.ollama import Ollama
from langchain_community.embeddings.ollama import OllamaEmbeddings
from llama_index.core import load_index_from_storage, StorageContext
from llama_index.core.node_parser import SentenceSplitter
import os
from llama_index.core import Settings
Settings.llm = Ollama(model="llama3")
Settings.embed_model = OllamaEmbeddings(model="llama3")
docs = ["./alice.pdf", "./ReAct.pdf"]
topic_docs = {}
for doc in docs:
doc_name = doc.split(".")[1].split("/")[-1]
topic_docs[doc_name] = SimpleDirectoryReader(input_files=[doc]).load_data()
node_paser = SentenceSplitter()
agents = {}
query_engines = {}
all_nodes = []
for id, document in topic_docs.items():
nodes = node_paser.get_nodes_from_documents(document)
all_nodes.extend(nodes)
if not os.path.exists(f"./{id}"):
vector_index = VectorStoreIndex(nodes=nodes, show_progress=True)
vector_index.storage_context.persist(persist_dir=f"./{id}")
else:
vector_index = load_index_from_storage(StorageContext.from_defaults(persist_dir=f"./{id}"))
summary_index = SummaryIndex(nodes=nodes, show_progress=True)
summary_index.storage_context.persist(persist_dir=f"./summary-{id}")
vector_query_engine = vector_index.as_query_engine()
summary_query_engine = summary_index.as_query_engine()
query_engine_tools = [
QueryEngineTool(
query_engine=vector_query_engine,
metadata=ToolMetadata(
name="vector_tool",
description=f"Useful for specific aspects of {id}"
),
),
QueryEngineTool(
query_engine=summary_query_engine,
metadata=ToolMetadata(
name="summary_tool",
description=f"Useful for any request that require a holistic summary about {id}"
)
)
]
agent = ReActAgent.from_tools(
query_engine_tools,
llm=Settings.llm,
verbose=True,
)
agents[id] = agent
query_engines[id] = vector_index.as_query_engine(similarity_top_k=2)
all_tools = []
for key, docu in topic_docs.items():
print(f"Processing {key}")
print("-------------------------------------")
summary = (
f"This content contains info about {key}"
f"Use this tool if want to answer any question about {key}."
)
doc_tool = QueryEngineTool(
query_engine=agents[key],
metadata=ToolMetadata(
name=f"tool_{key}",
description=summary
),
)
all_tools.append(doc_tool)
from llama_index.core.objects import ObjectIndex
obj_index = ObjectIndex.from_objects(
all_tools,
index_cls=VectorStoreIndex,
)
top_agent = ReActAgent.from_tools(
tool_retriever=obj_index.as_retriever(similarity_top_k=1),
verbose=True
)
base_index = VectorStoreIndex(all_nodes)
base_query_engine = base_index.as_query_engine(similarity_top_k=4)
response = top_agent.query("Why did Alice run after the rabbit?")