|
import os
|
|
import logging
|
|
import sys
|
|
|
|
from flask import Flask, request, jsonify, Response
|
|
|
|
app = Flask(__name__)
|
|
|
|
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
|
|
|
|
from llama_index.llms.openai import OpenAI
|
|
from llama_index.embeddings.openai import OpenAIEmbedding
|
|
from llama_index.core import (
|
|
Settings,
|
|
SimpleDirectoryReader,
|
|
StorageContext,
|
|
Document,
|
|
)
|
|
|
|
Settings.llm = OpenAI(model="gpt-3.5-turbo")
|
|
Settings.embed_model = OpenAIEmbedding(model_name="text-embedding-3-small")
|
|
directory_path = "documentos"
|
|
from llama_index.readers.file import PDFReader
|
|
file_extractor = {".pdf": PDFReader(return_full_document = True)}
|
|
from drive_downloader import GoogleDriveDownloader
|
|
|
|
|
|
folder_id = "1n34bmh9rlbOtCvE_WPZRukQilKeabWsN"
|
|
local_path = directory_path
|
|
|
|
GoogleDriveDownloader().download_from_folder(folder_id, local_path)
|
|
|
|
documents = SimpleDirectoryReader(
|
|
input_dir=directory_path,
|
|
file_extractor=file_extractor,
|
|
filename_as_id=True,
|
|
recursive=True
|
|
).load_data()
|
|
|
|
from document_creator import create_single_document_with_filenames
|
|
document = create_single_document_with_filenames(directory_path = directory_path)
|
|
documents.append(document)
|
|
|
|
|
|
|
|
from llama_index.core.node_parser import SentenceSplitter
|
|
splitter = SentenceSplitter(chunk_size=1024, chunk_overlap=128)
|
|
nodes = splitter.get_nodes_from_documents(documents)
|
|
|
|
from llama_index.core.storage.docstore import SimpleDocumentStore
|
|
docstore = SimpleDocumentStore()
|
|
docstore.add_documents(nodes)
|
|
|
|
from llama_index.core import VectorStoreIndex, StorageContext
|
|
from llama_index.vector_stores.chroma import ChromaVectorStore
|
|
import chromadb
|
|
|
|
db = chromadb.PersistentClient(path="chroma_db")
|
|
chroma_collection = db.get_or_create_collection("dense_vectors")
|
|
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
|
|
storage_context = StorageContext.from_defaults(
|
|
docstore=docstore, vector_store=vector_store
|
|
)
|
|
index = VectorStoreIndex(nodes = nodes, storage_context=storage_context, show_progress = True)
|
|
|
|
storage_context.docstore.persist("./docstore.json")
|
|
|
|
index_retriever = index.as_retriever(similarity_top_k=2)
|
|
import nest_asyncio
|
|
nest_asyncio.apply()
|
|
from llama_index.retrievers.bm25 import BM25Retriever
|
|
bm25_retriever = BM25Retriever.from_defaults(
|
|
docstore=index.docstore,
|
|
similarity_top_k=2,
|
|
language = "portuguese",
|
|
verbose=True,
|
|
)
|
|
|
|
from llama_index.core.retrievers import QueryFusionRetriever
|
|
|
|
retriever = QueryFusionRetriever(
|
|
[index_retriever, bm25_retriever],
|
|
num_queries=1,
|
|
mode="reciprocal_rerank",
|
|
use_async=True,
|
|
verbose=True,
|
|
)
|
|
|
|
|
|
from llama_index.core.memory import ChatMemoryBuffer
|
|
from mysqlchatstore import MySQLChatStore
|
|
chat_store = MySQLChatStore.from_params(
|
|
host=os.getenv("MYSQL_HOST"),
|
|
port=os.getenv("MYSQL_PORT"),
|
|
user=os.getenv("MYSQL_USER"),
|
|
password=os.getenv("MYSQL_PASSWORD"),
|
|
database=os.getenv("MYSQL_DATABASE"),
|
|
table_name=os.getenv("MYSQL_TABLE")
|
|
)
|
|
chat_memory = ChatMemoryBuffer.from_defaults(
|
|
token_limit=3000,
|
|
chat_store=chat_store,
|
|
chat_store_key="Sicoob",
|
|
)
|
|
from llama_index.core.query_engine import RetrieverQueryEngine
|
|
query_engine = RetrieverQueryEngine.from_args(retriever)
|
|
from llama_index.core.chat_engine import CondensePlusContextChatEngine
|
|
chat_engine = CondensePlusContextChatEngine.from_defaults(
|
|
query_engine,
|
|
memory=chat_memory,
|
|
context_prompt=(
|
|
"Você é um assistente virtual capaz de interagir normalmente, além de"
|
|
" fornecer informações sobre organogramas e listar funcionários."
|
|
" Aqui estão os documentos relevantes para o contexto:\n"
|
|
"{context_str}"
|
|
"\nInstrução: Use o histórico da conversa anterior, ou o contexto acima, para responder."
|
|
"No final da resposta, depois de uma quebra de linha escreva o nome do documento que contém a informação entre dois ||, como ||Documento Nome||"
|
|
|
|
),
|
|
)
|
|
|
|
|
|
|
|
@app.route("/chat", methods=["POST"])
|
|
def chat():
|
|
user_input = request.json.get("message", "")
|
|
if not user_input:
|
|
return jsonify({"error": "Mensagem vazia"}), 400
|
|
|
|
def generate_response():
|
|
try:
|
|
response = chat_engine.stream_chat(user_input)
|
|
for token in response.response_gen:
|
|
yield token
|
|
except Exception as e:
|
|
yield f"Erro: {str(e)}"
|
|
|
|
return Response(generate_response(), content_type="text/plain")
|
|
if __name__ == "__main__":
|
|
app.run(port=5001, debug=False)
|
|
|