|
import gradio as gr |
|
from llama_index.llms.llama_cpp import LlamaCPP |
|
from llama_index.core import VectorStoreIndex, StorageContext |
|
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch |
|
import pymongo |
|
from pymongo.mongo_client import MongoClient |
|
from pymongo.operations import SearchIndexModel |
|
from llama_index.core import VectorStoreIndex, StorageContext |
|
import os |
|
|
|
model_url = "https://huggingface.co/georgesung/llama3_8b_chat_uncensored/resolve/main/llama3_8b_chat_uncensored_q4_0.gguf" |
|
|
|
llm = LlamaCPP( |
|
|
|
model_url=model_url, |
|
|
|
model_path=None, |
|
temperature=0.01, |
|
max_new_tokens=256, |
|
|
|
context_window=3900, |
|
|
|
generate_kwargs={}, |
|
|
|
|
|
model_kwargs={"n_gpu_layers": 1}, |
|
verbose=True, |
|
) |
|
|
|
|
|
from llama_index.embeddings.huggingface import HuggingFaceEmbedding |
|
|
|
from llama_index.core.node_parser import SentenceSplitter |
|
from llama_index.core import Settings |
|
|
|
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en") |
|
Settings.llm = llm |
|
Settings.embed_model = embed_model |
|
Settings.node_parser = SentenceSplitter(chunk_size=1024) |
|
Settings.num_output = 256 |
|
Settings.context_window = 3900 |
|
|
|
|
|
|
|
MONGO_URI = os.getenv("MONGO_URI") |
|
os.environ["MONGODB_URI"] = MONGO_URI |
|
DB_NAME = os.getenv("DB_NAME") |
|
COLLECTION_NAME = os.getenv("COLLECTION_NAME") |
|
|
|
mongo_client = MongoClient(MONGO_URI) |
|
collection = mongo_client[DB_NAME][COLLECTION_NAME] |
|
|
|
|
|
vector_store = MongoDBAtlasVectorSearch(mongo_client, db_name=DB_NAME, collection_name=COLLECTION_NAME, vector_index_name="default") |
|
|
|
|
|
index = VectorStoreIndex.from_vector_store(vector_store) |
|
|
|
|
|
|
|
def respond( |
|
message, |
|
history: list[tuple[str, str]], |
|
system_message, |
|
top_k, |
|
): |
|
messages = [{"role": "system", "content": system_message}] |
|
|
|
for val in history: |
|
if val[0]: |
|
messages.append({"role": "user", "content": val[0]}) |
|
if val[1]: |
|
messages.append({"role": "assistant", "content": val[1]}) |
|
|
|
|
|
query_engine = index.as_query_engine(similarity_top_k=top_k) |
|
|
|
query_str = message |
|
response = query_engine.query(query_str) |
|
|
|
return response |
|
|
|
""" |
|
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface |
|
""" |
|
demo = gr.ChatInterface( |
|
respond, |
|
additional_inputs=[ |
|
gr.Textbox(value="Qual é sua pergunta?", label="System message"), |
|
gr.Slider(minimum=1, maximum=10, value=3, step=1, label="top-k"), |
|
], |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |