Spaces:
Running
Running
from dataclasses import dataclass, field | |
from typing import List, Optional | |
from rest_framework import serializers | |
from _utils.gerar_relatorio_modelo_usuario.prompts import ( | |
prompt_gerar_documento, | |
prompt_auxiliar_padrao, | |
) | |
from setup.environment import default_model | |
from django.core.files.uploadedfile import UploadedFile | |
user_message = "What are the main points of this document?" | |
prompt_template = """ | |
Based on the following context, provide multiple key points from the document. | |
For each point, create a new paragraph. | |
Each paragraph should be a complete, self-contained insight. | |
Context: {context} | |
Key points: | |
""" | |
class GerarDocumentoInitialSerializer(serializers.Serializer): | |
files = serializers.ListField(child=serializers.FileField(), required=True) | |
system_prompt = serializers.CharField(required=False, default=prompt_template) | |
user_message = serializers.CharField(required=False, default="") | |
model = serializers.CharField(required=False, default=default_model) | |
hf_embedding = serializers.CharField(required=False, default="all-MiniLM-L6-v2") | |
chunk_size = serializers.IntegerField(required=False, default=3500) | |
chunk_overlap = serializers.IntegerField(required=False, default=800) | |
class GerarDocumentoInitialSerializerData: | |
files: List[dict] | |
system_prompt: str = prompt_template | |
user_message: str = "" | |
model: str = default_model | |
hf_embedding: str = "all-MiniLM-L6-v2" | |
chunk_size: int = 3500 | |
chunk_overlap: int = 800 | |
class FileInfoSerializer(serializers.Serializer): | |
unique_id = serializers.CharField(max_length=255) | |
tipo_arquivo = serializers.CharField(max_length=255) | |
link_arquivo = serializers.URLField() | |
class FileInfoSerializerData: | |
unique_id: str | |
tipo_arquivo: str | |
link_arquivo: str | |
class GerarDocumentoSerializer(GerarDocumentoInitialSerializer): | |
system_prompt = None | |
files = serializers.ListField(child=FileInfoSerializer(), required=True) | |
bubble_editor_version = serializers.CharField( | |
required=False, default="version-test" | |
) # Será o valor utilizado dentro da URL da requisição pro Bubble | |
# prompt_auxiliar = serializers.CharField( | |
# required=False, default=prompt_auxiliar_padrao | |
# ) | |
prompt_gerar_documento = serializers.CharField( | |
required=False, default=prompt_gerar_documento | |
) | |
user_message = serializers.CharField(required=False, default=user_message) | |
num_chunks_retrieval = serializers.IntegerField(default=20) | |
embedding_weight = serializers.FloatField(default=0.5) | |
bm25_weight = serializers.FloatField(default=0.5) | |
context_window = serializers.IntegerField(default=3) | |
chunk_overlap = serializers.IntegerField(default=800) | |
num_k_rerank = serializers.IntegerField(default=20) | |
model_cohere_rerank = serializers.CharField( | |
required=False, default="rerank-english-v2.0" | |
) | |
more_initial_chunks_for_reranking = serializers.IntegerField(default=100) | |
claude_context_model = serializers.CharField( | |
required=False, default="claude-3-haiku-20240307" | |
) | |
gpt_temperature = serializers.FloatField(default=0) | |
id_modelo_do_usuario = serializers.IntegerField(required=False) | |
should_have_contextual_chunks = serializers.BooleanField(default=False) # type: ignore | |
should_use_llama_parse = serializers.BooleanField(required=False, default=False) # type: ignore | |
llm_ultimas_requests = serializers.CharField( | |
required=False, default="gemini-2.0-flash" | |
) | |
doc_id = serializers.CharField(required=True) | |
form_response_id = serializers.CharField(required=True) | |
version = serializers.CharField(required=True) | |
def get_obj(self): | |
return GerarDocumentoSerializerData(**self.validated_data) # type: ignore | |
class GerarDocumentoSerializerData(GerarDocumentoInitialSerializerData): | |
files: List[FileInfoSerializerData] | |
bubble_editor_version: str = "version-test" | |
prompt_gerar_documento: str = "" | |
user_message: str = "" | |
num_chunks_retrieval: int = 20 | |
embedding_weight: float = 0.5 | |
bm25_weight: float = 0.5 | |
context_window: int = 3 | |
chunk_overlap: int = 800 | |
num_k_rerank: int = 20 | |
model_cohere_rerank: str = "rerank-english-v2.0" | |
more_initial_chunks_for_reranking: int = 100 | |
claude_context_model: str = "claude-3-haiku-20240307" | |
gpt_temperature: float = 0.0 | |
id_modelo_do_usuario: Optional[int] = None | |
should_have_contextual_chunks: bool = False | |
should_use_llama_parse: bool = False | |
llm_ultimas_requests: str = "gemini-2.0-flash" | |
doc_id: str = "" | |
form_response_id: str = "" | |
version: str = "" | |
class GerarDocumentoComPDFProprioSerializer(GerarDocumentoInitialSerializer): | |
system_prompt = None | |
# prompt_auxiliar = serializers.CharField( | |
# required=False, default=prompt_auxiliar_padrao | |
# ) | |
prompt_gerar_documento = serializers.CharField( | |
required=False, default=prompt_gerar_documento | |
) | |
user_message = serializers.CharField(required=False, default=user_message) | |
num_chunks_retrieval = serializers.IntegerField(default=20) | |
embedding_weight = serializers.FloatField(default=0.5) | |
bm25_weight = serializers.FloatField(default=0.5) | |
context_window = serializers.IntegerField(default=3) | |
chunk_overlap = serializers.IntegerField(default=800) | |
num_k_rerank = serializers.IntegerField(default=20) | |
model_cohere_rerank = serializers.CharField( | |
required=False, default="rerank-english-v2.0" | |
) | |
more_initial_chunks_for_reranking = serializers.IntegerField(default=100) | |
claude_context_model = serializers.CharField( | |
required=False, default="claude-3-haiku-20240307" | |
) | |
gpt_temperature = serializers.FloatField(default=0) | |
id_modelo_do_usuario = serializers.IntegerField(required=False, default=11) | |
should_have_contextual_chunks = serializers.BooleanField(default=False) # type: ignore | |
should_use_llama_parse = serializers.BooleanField(required=False, default=False) # type: ignore | |
llm_ultimas_requests = serializers.CharField(required=False, default="gpt-4o-mini") | |
def get_obj(self): | |
return GerarDocumentoSerializerData(**self.validated_data) # type: ignore | |
class GerarDocumentoComPDFProprioSerializerData(GerarDocumentoInitialSerializerData): | |
prompt_gerar_documento: Optional[str] = field(default=None) | |
user_message: Optional[str] = field(default=None) | |
num_chunks_retrieval: int = field(default=20) | |
embedding_weight: float = field(default=0.5) | |
bm25_weight: float = field(default=0.5) | |
context_window: int = field(default=3) | |
chunk_overlap: int = field(default=800) | |
num_k_rerank: int = field(default=20) | |
model_cohere_rerank: str = field(default="rerank-english-v2.0") | |
more_initial_chunks_for_reranking: int = field(default=100) | |
claude_context_model: str = field(default="claude-3-haiku-20240307") | |
gpt_temperature: float = field(default=0.0) | |
id_modelo_do_usuario: int = field(default=11) | |
should_have_contextual_chunks: bool = field(default=False) | |
should_use_llama_parse: bool = field(default=False) | |
llm_ultimas_requests: str = field(default="gpt-4o-mini") | |
class GerarEmentaSerializer(serializers.Serializer): | |
files = serializers.ListField(child=FileInfoSerializer(), required=True) | |
user_message = serializers.CharField(required=False, default="") | |
chunk_size = serializers.IntegerField(required=False, default=3500) | |
chunk_overlap = serializers.IntegerField(required=False, default=800) | |
bubble_editor_version = serializers.CharField( | |
required=False, default="version-test" | |
) # Será o valor utilizado dentro da URL da requisição pro Bubble | |
doc_id = serializers.CharField(required=True) | |
form_response_id = serializers.CharField(required=True) | |
version = serializers.CharField(required=True) | |