Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,10 +7,68 @@ from langchain_community.vectorstores import FAISS
|
|
7 |
from langchain.chains import RetrievalQA
|
8 |
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
9 |
from langchain.prompts import PromptTemplate
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
# --- CONFIGURAÇÕES DE MODELOS ---
|
12 |
-
|
13 |
-
LLM_MODEL = 'google/gemma-3-1b-it'
|
14 |
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
15 |
|
16 |
# --- CONFIGURAÇÃO DO TOKEN HF ---
|
|
|
7 |
from langchain.chains import RetrievalQA
|
8 |
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
9 |
from langchain.prompts import PromptTemplate
|
10 |
+
from langchain_community.document_loaders import WebBaseLoader
|
11 |
+
from langchain_text_splitters import CharacterTextSplitter
|
12 |
+
from langchain_community.vectorstores import FAISS
|
13 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
14 |
+
from dotenv import load_dotenv
|
15 |
+
import os
|
16 |
+
import logging
|
17 |
+
logging.getLogger("langchain.text_splitter").setLevel(logging.ERROR)
|
18 |
+
import warnings
|
19 |
+
warnings.filterwarnings("ignore")
|
20 |
+
from langchain_community.document_loaders import RecursiveUrlLoader
|
21 |
+
import yaml
|
22 |
+
|
23 |
+
|
24 |
+
|
25 |
+
# ------------ criando vs -----------------
|
26 |
+
|
27 |
+
## knowledge base offline
|
28 |
+
url_list = [
|
29 |
+
"https://www.infinitepay.io",
|
30 |
+
"https://www.infinitepay.io/maquininha",
|
31 |
+
"https://www.infinitepay.io/maquininha-celular",
|
32 |
+
"https://www.infinitepay.io/tap-to-pay",
|
33 |
+
"https://www.infinitepay.io/pdv",
|
34 |
+
"https://www.infinitepay.io/receba-na-hora",
|
35 |
+
"https://www.infinitepay.io/gestao-de-cobranca",
|
36 |
+
"https://www.infinitepay.io/gestao-de-cobranca-2",
|
37 |
+
"https://www.infinitepay.io/link-de-pagamento",
|
38 |
+
"https://www.infinitepay.io/loja-online",
|
39 |
+
"https://www.infinitepay.io/boleto",
|
40 |
+
"https://www.infinitepay.io/conta-digital",
|
41 |
+
"https://www.infinitepay.io/conta-pj",
|
42 |
+
"https://www.infinitepay.io/pix",
|
43 |
+
"https://www.infinitepay.io/pix-parcelado",
|
44 |
+
"https://www.infinitepay.io/emprestimo",
|
45 |
+
"https://www.infinitepay.io/cartao",
|
46 |
+
"https://www.infinitepay.io/rendimento",
|
47 |
+
'https://www.infinitepay.io/taxas',
|
48 |
+
'https://www.cloudwalk.io/',
|
49 |
+
'https://www.cloudwalk.io/#our-mission',
|
50 |
+
'https://www.cloudwalk.io/#our-pillars',
|
51 |
+
'https://www.cloudwalk.io/#our-products',
|
52 |
+
]
|
53 |
+
|
54 |
+
# Carregue o conteúdo da página web como documentos LangChain
|
55 |
+
loader = WebBaseLoader(web_paths=url_list_unique)
|
56 |
+
docs = loader.load()
|
57 |
+
print(f"Total de páginas carregadas: {len(docs)}")
|
58 |
+
|
59 |
+
text_splitter = CharacterTextSplitter(chunk_size=1500, chunk_overlap=100)
|
60 |
+
split_docs = text_splitter.split_documents(docs)
|
61 |
+
embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL,
|
62 |
+
cache_folder=cache_folder)
|
63 |
+
vector_store = FAISS.from_documents(split_docs, embeddings)
|
64 |
+
# vs_base = "../vector_store/vs_base"
|
65 |
+
os.makedirs(VS_BASE, exist_ok=True)
|
66 |
+
vector_store.save_local(VS_BASE)
|
67 |
+
print(f"vs_base salva em {VS_BASE}")
|
68 |
|
69 |
# --- CONFIGURAÇÕES DE MODELOS ---
|
70 |
+
LLM_MODEL = 'google/gemma-3-4b-it'
|
71 |
+
# LLM_MODEL = 'google/gemma-3-1b-it'
|
72 |
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
73 |
|
74 |
# --- CONFIGURAÇÃO DO TOKEN HF ---
|