Update app.py
Browse files
app.py
CHANGED
@@ -1,44 +1,29 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
from llama_index.llms.llama_cpp import LlamaCPP
|
3 |
-
from llama_index.core import VectorStoreIndex, StorageContext
|
4 |
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
|
5 |
import pymongo
|
6 |
from pymongo.mongo_client import MongoClient
|
7 |
from pymongo.operations import SearchIndexModel
|
8 |
from llama_index.core import VectorStoreIndex, StorageContext
|
|
|
|
|
|
|
|
|
9 |
import os
|
10 |
###### load LLM
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
# You can pass in the URL to a GGML model to download it automatically
|
15 |
-
model_url=model_url,
|
16 |
-
# optionally, you can set the path to a pre-downloaded model instead of model_url
|
17 |
-
model_path=None,
|
18 |
-
temperature=0.01,
|
19 |
-
max_new_tokens=256, # could be larger but requires more time
|
20 |
-
# llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
|
21 |
-
context_window=3900,
|
22 |
-
# kwargs to pass to __call__()
|
23 |
-
generate_kwargs={},
|
24 |
-
# kwargs to pass to __init__()
|
25 |
-
# set to at least 1 to use GPU
|
26 |
-
model_kwargs={"n_gpu_layers": 1},
|
27 |
-
verbose=True,
|
28 |
-
)
|
29 |
-
# load embedding model
|
30 |
-
# sentence transformers
|
31 |
-
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
32 |
|
33 |
-
|
34 |
-
|
|
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
Settings.num_output = 256
|
41 |
-
Settings.context_window = 3900
|
42 |
|
43 |
# Load vector database
|
44 |
|
@@ -52,10 +37,39 @@ collection = mongo_client[DB_NAME][COLLECTION_NAME]
|
|
52 |
#
|
53 |
|
54 |
vector_store = MongoDBAtlasVectorSearch(mongo_client, db_name=DB_NAME, collection_name=COLLECTION_NAME, vector_index_name="default")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
-
#
|
57 |
-
|
58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
########### FOR CHAT
|
61 |
def respond(
|
@@ -73,12 +87,21 @@ def respond(
|
|
73 |
messages.append({"role": "assistant", "content": val[1]})
|
74 |
#
|
75 |
# build the query engine
|
76 |
-
|
77 |
-
#
|
78 |
query_str = message
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
#
|
81 |
-
return
|
82 |
#
|
83 |
"""
|
84 |
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
|
|
|
1 |
+
# version for gradio
|
2 |
import gradio as gr
|
3 |
from llama_index.llms.llama_cpp import LlamaCPP
|
|
|
4 |
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
|
5 |
import pymongo
|
6 |
from pymongo.mongo_client import MongoClient
|
7 |
from pymongo.operations import SearchIndexModel
|
8 |
from llama_index.core import VectorStoreIndex, StorageContext
|
9 |
+
|
10 |
+
from llama_index.core.vector_stores.types import VectorStoreQuery
|
11 |
+
|
12 |
+
from langchain_nomic.embeddings import NomicEmbeddings
|
13 |
import os
|
14 |
###### load LLM
|
15 |
+
os.system("ollama pull llama3.2:3b-instruct-fp16")
|
16 |
+
# LLM
|
17 |
+
from langchain_ollama import ChatOllama
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
+
local_llm = "llama3.2:3b-instruct-fp16"
|
20 |
+
llm = ChatOllama(model=local_llm, temperature=0)
|
21 |
+
llm_json_mode = ChatOllama(model=local_llm, temperature=0, format="json")
|
22 |
|
23 |
+
# load embedding model
|
24 |
+
# sentence transformers
|
25 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
26 |
+
embed_model = NomicEmbeddings(model="nomic-embed-text-v1.5", inference_mode="local")
|
|
|
|
|
27 |
|
28 |
# Load vector database
|
29 |
|
|
|
37 |
#
|
38 |
|
39 |
vector_store = MongoDBAtlasVectorSearch(mongo_client, db_name=DB_NAME, collection_name=COLLECTION_NAME, vector_index_name="default")
|
40 |
+
#
|
41 |
+
# print(query_results.nodes[0].text)
|
42 |
+
|
43 |
+
# COMPONENT
|
44 |
+
### Router
|
45 |
+
import json
|
46 |
+
from langchain_core.messages import HumanMessage, SystemMessage
|
47 |
+
|
48 |
+
# Prompt
|
49 |
+
### Generate
|
50 |
|
51 |
+
# Prompt
|
52 |
+
rag_prompt = """Você é um assistente multilíngue para tarefas de resposta a perguntas.
|
53 |
|
54 |
+
Aquí está o contexto a ser usado para responder à pergunta:
|
55 |
+
|
56 |
+
{context}
|
57 |
+
|
58 |
+
Pense cuidadosamente acerca do contexto de acima.
|
59 |
+
|
60 |
+
Agora, revise a pergunta do usuario:
|
61 |
+
|
62 |
+
{question}
|
63 |
+
|
64 |
+
Forneça uma resposta a essas perguntas usando apenas o contexto acima.
|
65 |
+
|
66 |
+
Mantenha sua resposta formal e concisa.
|
67 |
+
|
68 |
+
Resposta:"""
|
69 |
+
|
70 |
+
# Post-processing
|
71 |
+
def format_docs(nodes):
|
72 |
+
return "\n\n".join(doc.text for doc in nodes)
|
73 |
|
74 |
########### FOR CHAT
|
75 |
def respond(
|
|
|
87 |
messages.append({"role": "assistant", "content": val[1]})
|
88 |
#
|
89 |
# build the query engine
|
90 |
+
####
|
|
|
91 |
query_str = message
|
92 |
+
question = query_str
|
93 |
+
#
|
94 |
+
query_embedding = embed_model.embed_query(query_str)
|
95 |
+
vector_store_query = VectorStoreQuery(query_embedding,similarity_top_k = top_k)
|
96 |
+
# Recover index
|
97 |
+
query_results = vector_store.query(vector_store_query)
|
98 |
+
docs = query_results.nodes
|
99 |
+
docs_txt = format_docs(docs)
|
100 |
+
rag_prompt_formatted = rag_prompt.format(context=docs_txt, question=question)
|
101 |
+
#print(rag_prompt_formatted)
|
102 |
+
generation = llm.invoke([HumanMessage(content=rag_prompt_formatted)])
|
103 |
#
|
104 |
+
return generation.content
|
105 |
#
|
106 |
"""
|
107 |
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
|