k3ybladewielder commited on
Commit
7d20361
·
verified ·
1 Parent(s): 1768992

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -2
app.py CHANGED
@@ -7,10 +7,68 @@ from langchain_community.vectorstores import FAISS
7
  from langchain.chains import RetrievalQA
8
  from langchain_huggingface.embeddings import HuggingFaceEmbeddings
9
  from langchain.prompts import PromptTemplate
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  # --- CONFIGURAÇÕES DE MODELOS ---
12
- # LLM_MODEL = 'google/gemma-3-4b-it'
13
- LLM_MODEL = 'google/gemma-3-1b-it'
14
  EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
15
 
16
  # --- CONFIGURAÇÃO DO TOKEN HF ---
 
7
  from langchain.chains import RetrievalQA
8
  from langchain_huggingface.embeddings import HuggingFaceEmbeddings
9
  from langchain.prompts import PromptTemplate
10
+ from langchain_community.document_loaders import WebBaseLoader
11
+ from langchain_text_splitters import CharacterTextSplitter
12
+ from langchain_community.vectorstores import FAISS
13
+ from langchain_huggingface import HuggingFaceEmbeddings
14
+ from dotenv import load_dotenv
15
+ import os
16
+ import logging
17
+ logging.getLogger("langchain.text_splitter").setLevel(logging.ERROR)
18
+ import warnings
19
+ warnings.filterwarnings("ignore")
20
+ from langchain_community.document_loaders import RecursiveUrlLoader
21
+ import yaml
22
+
23
+
24
+
25
+ # ------------ criando vs -----------------
26
+
27
+ ## knowledge base offline
28
+ url_list = [
29
+ "https://www.infinitepay.io",
30
+ "https://www.infinitepay.io/maquininha",
31
+ "https://www.infinitepay.io/maquininha-celular",
32
+ "https://www.infinitepay.io/tap-to-pay",
33
+ "https://www.infinitepay.io/pdv",
34
+ "https://www.infinitepay.io/receba-na-hora",
35
+ "https://www.infinitepay.io/gestao-de-cobranca",
36
+ "https://www.infinitepay.io/gestao-de-cobranca-2",
37
+ "https://www.infinitepay.io/link-de-pagamento",
38
+ "https://www.infinitepay.io/loja-online",
39
+ "https://www.infinitepay.io/boleto",
40
+ "https://www.infinitepay.io/conta-digital",
41
+ "https://www.infinitepay.io/conta-pj",
42
+ "https://www.infinitepay.io/pix",
43
+ "https://www.infinitepay.io/pix-parcelado",
44
+ "https://www.infinitepay.io/emprestimo",
45
+ "https://www.infinitepay.io/cartao",
46
+ "https://www.infinitepay.io/rendimento",
47
+ 'https://www.infinitepay.io/taxas',
48
+ 'https://www.cloudwalk.io/',
49
+ 'https://www.cloudwalk.io/#our-mission',
50
+ 'https://www.cloudwalk.io/#our-pillars',
51
+ 'https://www.cloudwalk.io/#our-products',
52
+ ]
53
+
54
+ # Carregue o conteúdo da página web como documentos LangChain
55
+ loader = WebBaseLoader(web_paths=url_list_unique)
56
+ docs = loader.load()
57
+ print(f"Total de páginas carregadas: {len(docs)}")
58
+
59
+ text_splitter = CharacterTextSplitter(chunk_size=1500, chunk_overlap=100)
60
+ split_docs = text_splitter.split_documents(docs)
61
+ embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL,
62
+ cache_folder=cache_folder)
63
+ vector_store = FAISS.from_documents(split_docs, embeddings)
64
+ # vs_base = "../vector_store/vs_base"
65
+ os.makedirs(VS_BASE, exist_ok=True)
66
+ vector_store.save_local(VS_BASE)
67
+ print(f"vs_base salva em {VS_BASE}")
68
 
69
  # --- CONFIGURAÇÕES DE MODELOS ---
70
+ LLM_MODEL = 'google/gemma-3-4b-it'
71
+ # LLM_MODEL = 'google/gemma-3-1b-it'
72
  EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
73
 
74
  # --- CONFIGURAÇÃO DO TOKEN HF ---