Spaces:
Running
Running
File size: 4,081 Bytes
756fca0 78209bc d07865c 2ce5e93 eebeb78 2ce5e93 d07865c f9a1a18 d07865c 78209bc b909baa 756fca0 85ee925 78209bc 756fca0 12d3e1a 756fca0 12d3e1a 78209bc d8410b4 d07865c 7757039 c6dbb49 d07865c c6dbb49 d07865c 85ee925 4b3b841 85ee925 d8410b4 f9a1a18 d8410b4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
from typing import Any, List, Tuple, Union
from langchain_core.documents import Document
from langchain_core.messages import HumanMessage
from _utils.gerar_documento_utils.llm_calls import agemini_answer
from _utils.langchain_utils.Splitter_class import Splitter
from _utils.langchain_utils.LLM_class import LLM
from _utils.gerar_documento_utils.prompts import (
create_prompt_auxiliar_do_contextual_prompt,
prompt_para_gerar_titulo,
)
from _utils.models.gerar_documento import DocumentChunk
from gerar_documento.serializer import GerarDocumentoSerializerData
import tiktoken
encoding = tiktoken.get_encoding("cl100k_base")
def gerar_resposta_compilada(serializer: Union[GerarDocumentoSerializerData, Any]):
return {
"num_chunks_retrieval": serializer.num_chunks_retrieval,
"embedding_weight": serializer.embedding_weight,
"bm25_weight": serializer.bm25_weight,
"context_window": serializer.context_window,
"chunk_overlap": serializer.chunk_overlap,
"num_k_rerank": serializer.num_k_rerank,
"model_cohere_rerank": serializer.model_cohere_rerank,
"more_initial_chunks_for_reranking": serializer.more_initial_chunks_for_reranking,
"claude_context_model": serializer.claude_context_model,
"gpt_temperature": serializer.gpt_temperature,
"user_message": serializer.user_message,
"model": serializer.model,
"hf_embedding": serializer.hf_embedding,
"chunk_size": serializer.chunk_size,
"chunk_overlap": serializer.chunk_overlap,
# "prompt_auxiliar": serializer.prompt_auxiliar,
"prompt_gerar_documento": serializer.prompt_gerar_documento[0:200],
}
# Esta função gera a resposta que será usada em cada um das requisições de cada chunk
async def get_response_from_auxiliar_contextual_prompt(full_text_as_array: List[str]):
llms = LLM()
responses = []
current_chunk = []
current_token_count = 0
chunk_counter = 1
for part in full_text_as_array:
part_tokens = len(encoding.encode(part))
# Check if adding this part would EXCEED the limit
if current_token_count + part_tokens > 600000:
# Process the accumulated chunk before it exceeds the limit
chunk_text = "".join(current_chunk)
print(
f"\nProcessing chunk {chunk_counter} with {current_token_count} tokens"
)
prompt = create_prompt_auxiliar_do_contextual_prompt(chunk_text)
response = await llms.google_gemini().ainvoke(
[HumanMessage(content=prompt)]
)
responses.append(response.content)
# Start new chunk with current part
current_chunk = [part]
current_token_count = part_tokens
chunk_counter += 1
else:
# Safe to add to current chunk
current_chunk.append(part)
current_token_count += part_tokens
# Process the final remaining chunk
if current_chunk:
chunk_text = "".join(current_chunk)
print(
f"\nProcessing final chunk {chunk_counter} with {current_token_count} tokens"
)
prompt = create_prompt_auxiliar_do_contextual_prompt(chunk_text)
response = await llms.google_gemini().ainvoke([HumanMessage(content=prompt)])
responses.append(response.content)
return "".join(responses)
def split_text_by_tokens(full_text: str):
tokens = encoding.encode(full_text)
max_tokens = 600000
# Divide os tokens em partes de no máximo max_tokens
token_chunks = [
tokens[i : i + max_tokens] for i in range(0, len(tokens), max_tokens)
]
# Decodifica cada pedaço de tokens de volta para texto
text_chunks = [encoding.decode(chunk) for chunk in token_chunks]
return text_chunks
async def generate_document_title(resumo_para_gerar_titulo: str):
prompt = prompt_para_gerar_titulo(resumo_para_gerar_titulo)
response = await agemini_answer(prompt, "gemini-2.0-flash-lite")
return response
|