from dataclasses import dataclass, field from typing import List, Optional from rest_framework import serializers from _utils.gerar_relatorio_modelo_usuario.prompts import ( prompt_gerar_documento, prompt_auxiliar_padrao, ) from setup.environment import default_model from django.core.files.uploadedfile import UploadedFile user_message = "What are the main points of this document?" prompt_template = """ Based on the following context, provide multiple key points from the document. For each point, create a new paragraph. Each paragraph should be a complete, self-contained insight. Context: {context} Key points: """ class GerarDocumentoInitialSerializer(serializers.Serializer): files = serializers.ListField(child=serializers.FileField(), required=True) system_prompt = serializers.CharField(required=False, default=prompt_template) user_message = serializers.CharField(required=False, default="") model = serializers.CharField(required=False, default=default_model) hf_embedding = serializers.CharField(required=False, default="all-MiniLM-L6-v2") chunk_size = serializers.IntegerField(required=False, default=3500) chunk_overlap = serializers.IntegerField(required=False, default=800) @dataclass class GerarDocumentoInitialSerializerData: files: List[dict] system_prompt: str = prompt_template user_message: str = "" model: str = default_model hf_embedding: str = "all-MiniLM-L6-v2" chunk_size: int = 3500 chunk_overlap: int = 800 class FileInfoSerializer(serializers.Serializer): unique_id = serializers.CharField(max_length=255) tipo_arquivo = serializers.CharField(max_length=255) link_arquivo = serializers.URLField() @dataclass class FileInfoSerializerData: unique_id: str tipo_arquivo: str link_arquivo: str class GerarDocumentoSerializer(GerarDocumentoInitialSerializer): system_prompt = None files = serializers.ListField(child=FileInfoSerializer(), required=True) bubble_editor_version = serializers.CharField( required=False, default="version-test" ) # Será o valor utilizado dentro da URL da requisição pro Bubble # prompt_auxiliar = serializers.CharField( # required=False, default=prompt_auxiliar_padrao # ) prompt_gerar_documento = serializers.CharField( required=False, default=prompt_gerar_documento ) user_message = serializers.CharField(required=False, default=user_message) num_chunks_retrieval = serializers.IntegerField(default=20) embedding_weight = serializers.FloatField(default=0.5) bm25_weight = serializers.FloatField(default=0.5) context_window = serializers.IntegerField(default=3) chunk_overlap = serializers.IntegerField(default=800) num_k_rerank = serializers.IntegerField(default=20) model_cohere_rerank = serializers.CharField( required=False, default="rerank-english-v2.0" ) more_initial_chunks_for_reranking = serializers.IntegerField(default=100) claude_context_model = serializers.CharField( required=False, default="claude-3-haiku-20240307" ) gpt_temperature = serializers.FloatField(default=0) id_modelo_do_usuario = serializers.IntegerField(required=False) should_have_contextual_chunks = serializers.BooleanField(default=False) # type: ignore should_use_llama_parse = serializers.BooleanField(required=False, default=False) # type: ignore llm_ultimas_requests = serializers.CharField( required=False, default="gemini-2.0-flash" ) doc_id = serializers.CharField(required=True) form_response_id = serializers.CharField(required=True) version = serializers.CharField(required=True) def get_obj(self): return GerarDocumentoSerializerData(**self.validated_data) # type: ignore @dataclass class GerarDocumentoSerializerData(GerarDocumentoInitialSerializerData): files: List[FileInfoSerializerData] bubble_editor_version: str = "version-test" prompt_gerar_documento: str = "" user_message: str = "" num_chunks_retrieval: int = 20 embedding_weight: float = 0.5 bm25_weight: float = 0.5 context_window: int = 3 chunk_overlap: int = 800 num_k_rerank: int = 20 model_cohere_rerank: str = "rerank-english-v2.0" more_initial_chunks_for_reranking: int = 100 claude_context_model: str = "claude-3-haiku-20240307" gpt_temperature: float = 0.0 id_modelo_do_usuario: Optional[int] = None should_have_contextual_chunks: bool = False should_use_llama_parse: bool = False llm_ultimas_requests: str = "gemini-2.0-flash" doc_id: str = "" form_response_id: str = "" version: str = "" class GerarDocumentoComPDFProprioSerializer(GerarDocumentoInitialSerializer): system_prompt = None # prompt_auxiliar = serializers.CharField( # required=False, default=prompt_auxiliar_padrao # ) prompt_gerar_documento = serializers.CharField( required=False, default=prompt_gerar_documento ) user_message = serializers.CharField(required=False, default=user_message) num_chunks_retrieval = serializers.IntegerField(default=20) embedding_weight = serializers.FloatField(default=0.5) bm25_weight = serializers.FloatField(default=0.5) context_window = serializers.IntegerField(default=3) chunk_overlap = serializers.IntegerField(default=800) num_k_rerank = serializers.IntegerField(default=20) model_cohere_rerank = serializers.CharField( required=False, default="rerank-english-v2.0" ) more_initial_chunks_for_reranking = serializers.IntegerField(default=100) claude_context_model = serializers.CharField( required=False, default="claude-3-haiku-20240307" ) gpt_temperature = serializers.FloatField(default=0) id_modelo_do_usuario = serializers.IntegerField(required=False, default=11) should_have_contextual_chunks = serializers.BooleanField(default=False) # type: ignore should_use_llama_parse = serializers.BooleanField(required=False, default=False) # type: ignore llm_ultimas_requests = serializers.CharField(required=False, default="gpt-4o-mini") def get_obj(self): return GerarDocumentoSerializerData(**self.validated_data) # type: ignore @dataclass class GerarDocumentoComPDFProprioSerializerData(GerarDocumentoInitialSerializerData): prompt_gerar_documento: Optional[str] = field(default=None) user_message: Optional[str] = field(default=None) num_chunks_retrieval: int = field(default=20) embedding_weight: float = field(default=0.5) bm25_weight: float = field(default=0.5) context_window: int = field(default=3) chunk_overlap: int = field(default=800) num_k_rerank: int = field(default=20) model_cohere_rerank: str = field(default="rerank-english-v2.0") more_initial_chunks_for_reranking: int = field(default=100) claude_context_model: str = field(default="claude-3-haiku-20240307") gpt_temperature: float = field(default=0.0) id_modelo_do_usuario: int = field(default=11) should_have_contextual_chunks: bool = field(default=False) should_use_llama_parse: bool = field(default=False) llm_ultimas_requests: str = field(default="gpt-4o-mini") class GerarEmentaSerializer(serializers.Serializer): files = serializers.ListField(child=FileInfoSerializer(), required=True) user_message = serializers.CharField(required=False, default="") chunk_size = serializers.IntegerField(required=False, default=3500) chunk_overlap = serializers.IntegerField(required=False, default=800) bubble_editor_version = serializers.CharField( required=False, default="version-test" ) # Será o valor utilizado dentro da URL da requisição pro Bubble doc_id = serializers.CharField(required=True) form_response_id = serializers.CharField(required=True) version = serializers.CharField(required=True)