luanpoppe commited on
Commit
d32424b
·
1 Parent(s): eebeb78

feat: adicionando e melhorando utilitários do langchain

Browse files
_utils/langchain_utils/Chain_class.py CHANGED
@@ -1,11 +1,23 @@
 
 
1
  class Chain:
2
- def __init__(self, prompt, model):
3
- self.prompt = prompt
4
- self.model = model
5
 
6
- def create_prompt_model_chain(self):
7
- return self.prompt | self.model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- def invoke_prompt_model_chain(self, invoke_params):
10
- chain = self.create_prompt_model_chain()
11
- return chain.invoke(invoke_params)
 
1
+ from setup.easy_imports import RunnablePassthrough, create_retrieval_chain
2
+
3
  class Chain:
 
 
 
4
 
5
+ def create_prompt_model_chain(self, prompt, model):
6
+ return prompt | model
7
+
8
+ def create_prompt_model_retriever_chain(self, prompt, model, retriever):
9
+ chain = prompt | model
10
+ return create_retrieval_chain(retriever, chain)
11
+
12
+ def invoke_retrieval_chain(self, chain, busca_no_vetor):
13
+ resposta = chain.invoke({"input": busca_no_vetor})
14
+
15
+ class Resposta:
16
+ def __init__(self, resposta):
17
+ self.final_answer = resposta["answer"].content
18
+ self.complete_obj = resposta
19
+
20
+ return Resposta(resposta)
21
+
22
 
23
+ chain = Chain()
 
 
_utils/langchain_utils/Document_class.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from setup.easy_imports import PyPDFLoader
2
+
3
+
4
+ class Document_Class:
5
+ def load_pdf(self, pdf, ocr=False):
6
+ return PyPDFLoader(pdf, extract_images=ocr).load()
7
+
8
+ def load_and_split_pdf(self, pdf, ocr=False):
9
+ return PyPDFLoader(pdf, extract_images=ocr).load_and_split()
10
+
11
+ def get_pdf_text(self, pdf, ocr=False):
12
+ document = self.load_pdf(pdf, ocr)
13
+ texto = ""
14
+ for x in document:
15
+ texto += x.page_content
16
+ return texto
17
+
18
+
19
+ document = Document_Class()
_utils/langchain_utils/LLM_class.py CHANGED
@@ -6,14 +6,15 @@ import os
6
 
7
  deepseek_api_key = cast(str, os.environ.get("DEEPSEEKK_API_KEY"))
8
  google_api_key = cast(str, os.environ.get("GOOGLE_API_KEY_PEIXE"))
 
9
 
10
 
11
  class LLM:
12
  def __init__(self):
13
  pass
14
 
15
- # def create_GPT_model(self, model=default_model):
16
- # return ChatOpen()
17
 
18
  def deepseek(self, model="deepseek-chat"):
19
  return ChatOpenAI(
 
6
 
7
  deepseek_api_key = cast(str, os.environ.get("DEEPSEEKK_API_KEY"))
8
  google_api_key = cast(str, os.environ.get("GOOGLE_API_KEY_PEIXE"))
9
+ open_ai_token = cast(str, os.environ.get("OPENAI_API_KEY"))
10
 
11
 
12
  class LLM:
13
  def __init__(self):
14
  pass
15
 
16
+ def open_ai(self, model="gpt-4o-mini"):
17
+ return ChatOpenAI(api_key=SecretStr(open_ai_token), model=model)
18
 
19
  def deepseek(self, model="deepseek-chat"):
20
  return ChatOpenAI(
_utils/langchain_utils/Prompt_class.py CHANGED
@@ -11,4 +11,10 @@ class Prompt:
11
  )
12
  return prompt_template
13
 
 
 
 
 
 
 
14
  prompt = Prompt()
 
11
  )
12
  return prompt_template
13
 
14
+ def create_and_invoke_prompt(self, user_prompt, system_prompt="", dynamic_dict={}):
15
+ return ChatPromptTemplate.from_messages(
16
+ [("system", system_prompt), ("user", user_prompt)]
17
+ ).invoke(dynamic_dict)
18
+
19
+
20
  prompt = Prompt()
_utils/langchain_utils/Splitter_class.py CHANGED
@@ -170,3 +170,31 @@ class Splitter:
170
  char_count += len(text)
171
 
172
  return chunks
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  char_count += len(text)
171
 
172
  return chunks
173
+
174
+
175
+ class Splitter_Simple:
176
+ def __init__(
177
+ self,
178
+ chunk_size=1000,
179
+ chunk_overlap=400,
180
+ ):
181
+ self.text_splitter = RecursiveCharacterTextSplitter(
182
+ chunk_size=chunk_size, chunk_overlap=chunk_overlap
183
+ )
184
+
185
+ async def load_and_split_document(self, pdf_path: str):
186
+ """Load PDF and split into chunks with metadata"""
187
+ print("\nCOMEÇANDO LEITURA DO PDF")
188
+ pages = PyPDFLoader(pdf_path).load_and_split(self.text_splitter)
189
+ print("\nTERMINADO LEITURA DO PDF")
190
+
191
+ return pages
192
+
193
+ def load_and_split_text(self, text: str) -> List[Document]:
194
+ documents: List[Document] = []
195
+ chunks = self.text_splitter.split_text(text)
196
+
197
+ for chunk in chunks:
198
+ documents.append(Document(page_content=chunk))
199
+
200
+ return documents
_utils/langchain_utils/embeddings.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from pydantic import Secret
4
+ from setup.easy_imports import OpenAIEmbeddings
5
+ from setup.tokens import openai_api_key
6
+
7
+
8
+ class EmbeddingClass:
9
+ def open_ai(self):
10
+ return OpenAIEmbeddings(api_key=Secret(openai_api_key)) # type: ignore
11
+
12
+
13
+ embedding = EmbeddingClass()
_utils/langchain_utils/retriever.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from _utils.langchain_utils.vector_stores import vector_store
2
+
3
+
4
+ class Retriever:
5
+ def chroma_retriever(
6
+ self, lista_de_documents, search_type="similarity", search_kwargs={"k": 1}
7
+ ):
8
+ retriever = vector_store.chroma(lista_de_documents).as_retriever(
9
+ search_type=search_type,
10
+ search_kwargs=search_kwargs,
11
+ )
12
+ return retriever
13
+
14
+
15
+ retriever = Retriever()
_utils/langchain_utils/vector_stores.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from setup.easy_imports import Chroma
2
+ from _utils.langchain_utils.embeddings import embedding
3
+
4
+
5
+ class VectorStoreClass:
6
+ def chroma(self, lista_de_documents):
7
+ return Chroma.from_documents(lista_de_documents, embedding.open_ai())
8
+
9
+
10
+ vector_store = VectorStoreClass()
setup/easy_imports.py CHANGED
@@ -11,14 +11,17 @@ from langchain_huggingface import HuggingFaceEmbeddings
11
 
12
  # from langchain_community.embeddings import HuggingFaceEmbeddings
13
  from langchain.prompts import PromptTemplate
 
14
  from langchain_core.prompts import ChatPromptTemplate
15
  from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
16
  from langchain_community.vectorstores import Chroma
17
  from langchain_google_genai import ChatGoogleGenerativeAI
18
 
19
  # from langchain_community.chat_models import ChatOpenAI
20
- from langchain_openai import ChatOpenAI
21
  from langchain.schema import Document
22
  from langchain.chains import create_extraction_chain
 
 
23
 
24
  from rank_bm25 import BM25Okapi
 
11
 
12
  # from langchain_community.embeddings import HuggingFaceEmbeddings
13
  from langchain.prompts import PromptTemplate
14
+ from langchain_core.runnables import RunnablePassthrough
15
  from langchain_core.prompts import ChatPromptTemplate
16
  from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
17
  from langchain_community.vectorstores import Chroma
18
  from langchain_google_genai import ChatGoogleGenerativeAI
19
 
20
  # from langchain_community.chat_models import ChatOpenAI
21
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
22
  from langchain.schema import Document
23
  from langchain.chains import create_extraction_chain
24
+ from langchain.chains.retrieval import create_retrieval_chain
25
+ from langchain.chains.combine_documents import create_stuff_documents_chain
26
 
27
  from rank_bm25 import BM25Okapi
setup/tokens.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import cast
3
+
4
+ openai_api_key = cast(str, os.environ.get("OPENAI_API_KEY", ""))
5
+ claude_api_key = cast(str, os.environ.get("CLAUDE_API_KEY"))
6
+ langchain_api_key = cast(str, os.environ.get("LANGCHAIN_API_KEY"))
7
+ hugging_face_api_key = cast(str, os.environ.get("HUGGINGFACEHUB_API_TOKEN"))
8
+ bubble_token = cast(str, os.environ.get("BUBBLE_TOKEN"))
9
+ cohere_api_key = cast(str, os.environ.get("COHERE_API_KEY", ""))
10
+ deepseek_api_key = cast(str, os.environ.get("DEEPSEEKK_API_KEY"))
11
+ google_api_key = cast(str, os.environ.get("GOOGLE_API_KEY_PEIXE"))