vermen commited on
Commit
094f2eb
·
verified ·
1 Parent(s): c872003

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -36
app.py CHANGED
@@ -1,44 +1,29 @@
 
1
  import gradio as gr
2
  from llama_index.llms.llama_cpp import LlamaCPP
3
- from llama_index.core import VectorStoreIndex, StorageContext
4
  from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
5
  import pymongo
6
  from pymongo.mongo_client import MongoClient
7
  from pymongo.operations import SearchIndexModel
8
  from llama_index.core import VectorStoreIndex, StorageContext
 
 
 
 
9
  import os
10
  ###### load LLM
11
- model_url = "https://huggingface.co/georgesung/llama3_8b_chat_uncensored/resolve/main/llama3_8b_chat_uncensored_q4_0.gguf"
12
-
13
- llm = LlamaCPP(
14
- # You can pass in the URL to a GGML model to download it automatically
15
- model_url=model_url,
16
- # optionally, you can set the path to a pre-downloaded model instead of model_url
17
- model_path=None,
18
- temperature=0.01,
19
- max_new_tokens=256, # could be larger but requires more time
20
- # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
21
- context_window=3900,
22
- # kwargs to pass to __call__()
23
- generate_kwargs={},
24
- # kwargs to pass to __init__()
25
- # set to at least 1 to use GPU
26
- model_kwargs={"n_gpu_layers": 1},
27
- verbose=True,
28
- )
29
- # load embedding model
30
- # sentence transformers
31
- from llama_index.embeddings.huggingface import HuggingFaceEmbedding
32
 
33
- from llama_index.core.node_parser import SentenceSplitter
34
- from llama_index.core import Settings
 
35
 
36
- embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en")
37
- Settings.llm = llm
38
- Settings.embed_model = embed_model
39
- Settings.node_parser = SentenceSplitter(chunk_size=1024)
40
- Settings.num_output = 256
41
- Settings.context_window = 3900
42
 
43
  # Load vector database
44
 
@@ -52,10 +37,39 @@ collection = mongo_client[DB_NAME][COLLECTION_NAME]
52
  #
53
 
54
  vector_store = MongoDBAtlasVectorSearch(mongo_client, db_name=DB_NAME, collection_name=COLLECTION_NAME, vector_index_name="default")
 
 
 
 
 
 
 
 
 
 
55
 
56
- # Recover index
57
- index = VectorStoreIndex.from_vector_store(vector_store)
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  ########### FOR CHAT
61
  def respond(
@@ -73,12 +87,21 @@ def respond(
73
  messages.append({"role": "assistant", "content": val[1]})
74
  #
75
  # build the query engine
76
- query_engine = index.as_query_engine(similarity_top_k=top_k)
77
- #
78
  query_str = message
79
- response = query_engine.query(query_str)
 
 
 
 
 
 
 
 
 
 
80
  #
81
- return str(response)
82
  #
83
  """
84
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 
1
+ # version for gradio
2
  import gradio as gr
3
  from llama_index.llms.llama_cpp import LlamaCPP
 
4
  from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
5
  import pymongo
6
  from pymongo.mongo_client import MongoClient
7
  from pymongo.operations import SearchIndexModel
8
  from llama_index.core import VectorStoreIndex, StorageContext
9
+
10
+ from llama_index.core.vector_stores.types import VectorStoreQuery
11
+
12
+ from langchain_nomic.embeddings import NomicEmbeddings
13
  import os
14
  ###### load LLM
15
+ os.system("ollama pull llama3.2:3b-instruct-fp16")
16
+ # LLM
17
+ from langchain_ollama import ChatOllama
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ local_llm = "llama3.2:3b-instruct-fp16"
20
+ llm = ChatOllama(model=local_llm, temperature=0)
21
+ llm_json_mode = ChatOllama(model=local_llm, temperature=0, format="json")
22
 
23
+ # load embedding model
24
+ # sentence transformers
25
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
26
+ embed_model = NomicEmbeddings(model="nomic-embed-text-v1.5", inference_mode="local")
 
 
27
 
28
  # Load vector database
29
 
 
37
  #
38
 
39
  vector_store = MongoDBAtlasVectorSearch(mongo_client, db_name=DB_NAME, collection_name=COLLECTION_NAME, vector_index_name="default")
40
+ #
41
+ # print(query_results.nodes[0].text)
42
+
43
+ # COMPONENT
44
+ ### Router
45
+ import json
46
+ from langchain_core.messages import HumanMessage, SystemMessage
47
+
48
+ # Prompt
49
+ ### Generate
50
 
51
+ # Prompt
52
+ rag_prompt = """Você é um assistente multilíngue para tarefas de resposta a perguntas.
53
 
54
+ Aquí está o contexto a ser usado para responder à pergunta:
55
+
56
+ {context}
57
+
58
+ Pense cuidadosamente acerca do contexto de acima.
59
+
60
+ Agora, revise a pergunta do usuario:
61
+
62
+ {question}
63
+
64
+ Forneça uma resposta a essas perguntas usando apenas o contexto acima.
65
+
66
+ Mantenha sua resposta formal e concisa.
67
+
68
+ Resposta:"""
69
+
70
+ # Post-processing
71
+ def format_docs(nodes):
72
+ return "\n\n".join(doc.text for doc in nodes)
73
 
74
  ########### FOR CHAT
75
  def respond(
 
87
  messages.append({"role": "assistant", "content": val[1]})
88
  #
89
  # build the query engine
90
+ ####
 
91
  query_str = message
92
+ question = query_str
93
+ #
94
+ query_embedding = embed_model.embed_query(query_str)
95
+ vector_store_query = VectorStoreQuery(query_embedding,similarity_top_k = top_k)
96
+ # Recover index
97
+ query_results = vector_store.query(vector_store_query)
98
+ docs = query_results.nodes
99
+ docs_txt = format_docs(docs)
100
+ rag_prompt_formatted = rag_prompt.format(context=docs_txt, question=question)
101
+ #print(rag_prompt_formatted)
102
+ generation = llm.invoke([HumanMessage(content=rag_prompt_formatted)])
103
  #
104
+ return generation.content
105
  #
106
  """
107
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface