Spaces:
Running
Running
File size: 7,950 Bytes
756fca0 1286e81 12d3e1a cb23311 1286e81 a263183 756fca0 1286e81 bdf043b 756fca0 cb23311 756fca0 bdf043b 1286e81 cb23311 4a04d77 12d3e1a 1286e81 55f46c1 1286e81 78209bc 55f46c1 1286e81 55f46c1 1286e81 23087eb b374298 8f3dc39 dc376b6 095b5f1 7eb86f7 756fca0 a263183 bdf043b 7eb86f7 4a04d77 7eb86f7 55f46c1 7eb86f7 78209bc 55f46c1 7eb86f7 55f46c1 7eb86f7 b374298 e70ffc1 3736ce1 a263183 756fca0 a1f037d 756fca0 a263183 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
from dataclasses import dataclass, field
from typing import List, Optional
from rest_framework import serializers
from _utils.gerar_relatorio_modelo_usuario.prompts import (
prompt_gerar_documento,
prompt_auxiliar_padrao,
)
from setup.environment import default_model
from django.core.files.uploadedfile import UploadedFile
user_message = "What are the main points of this document?"
prompt_template = """
Based on the following context, provide multiple key points from the document.
For each point, create a new paragraph.
Each paragraph should be a complete, self-contained insight.
Context: {context}
Key points:
"""
class GerarDocumentoInitialSerializer(serializers.Serializer):
files = serializers.ListField(child=serializers.FileField(), required=True)
system_prompt = serializers.CharField(required=False, default=prompt_template)
user_message = serializers.CharField(required=False, default="")
model = serializers.CharField(required=False, default=default_model)
hf_embedding = serializers.CharField(required=False, default="all-MiniLM-L6-v2")
chunk_size = serializers.IntegerField(required=False, default=3500)
chunk_overlap = serializers.IntegerField(required=False, default=800)
@dataclass
class GerarDocumentoInitialSerializerData:
files: List[dict]
system_prompt: str = prompt_template
user_message: str = ""
model: str = default_model
hf_embedding: str = "all-MiniLM-L6-v2"
chunk_size: int = 3500
chunk_overlap: int = 800
class FileInfoSerializer(serializers.Serializer):
unique_id = serializers.CharField(max_length=255)
tipo_arquivo = serializers.CharField(max_length=255)
link_arquivo = serializers.URLField()
@dataclass
class FileInfoSerializerData:
unique_id: str
tipo_arquivo: str
link_arquivo: str
class GerarDocumentoSerializer(GerarDocumentoInitialSerializer):
system_prompt = None
files = serializers.ListField(child=FileInfoSerializer(), required=True)
bubble_editor_version = serializers.CharField(
required=False, default="version-test"
) # Será o valor utilizado dentro da URL da requisição pro Bubble
# prompt_auxiliar = serializers.CharField(
# required=False, default=prompt_auxiliar_padrao
# )
prompt_gerar_documento = serializers.CharField(
required=False, default=prompt_gerar_documento
)
user_message = serializers.CharField(required=False, default=user_message)
num_chunks_retrieval = serializers.IntegerField(default=20)
embedding_weight = serializers.FloatField(default=0.5)
bm25_weight = serializers.FloatField(default=0.5)
context_window = serializers.IntegerField(default=3)
chunk_overlap = serializers.IntegerField(default=800)
num_k_rerank = serializers.IntegerField(default=20)
model_cohere_rerank = serializers.CharField(
required=False, default="rerank-english-v2.0"
)
more_initial_chunks_for_reranking = serializers.IntegerField(default=100)
claude_context_model = serializers.CharField(
required=False, default="claude-3-haiku-20240307"
)
gpt_temperature = serializers.FloatField(default=0)
id_modelo_do_usuario = serializers.IntegerField(required=False)
should_have_contextual_chunks = serializers.BooleanField(default=False) # type: ignore
should_use_llama_parse = serializers.BooleanField(required=False, default=False) # type: ignore
llm_ultimas_requests = serializers.CharField(
required=False, default="gemini-2.0-flash"
)
doc_id = serializers.CharField(required=True)
form_response_id = serializers.CharField(required=True)
version = serializers.CharField(required=True)
def get_obj(self):
return GerarDocumentoSerializerData(**self.validated_data) # type: ignore
@dataclass
class GerarDocumentoSerializerData(GerarDocumentoInitialSerializerData):
files: List[FileInfoSerializerData]
bubble_editor_version: str = "version-test"
prompt_gerar_documento: str = ""
user_message: str = ""
num_chunks_retrieval: int = 20
embedding_weight: float = 0.5
bm25_weight: float = 0.5
context_window: int = 3
chunk_overlap: int = 800
num_k_rerank: int = 20
model_cohere_rerank: str = "rerank-english-v2.0"
more_initial_chunks_for_reranking: int = 100
claude_context_model: str = "claude-3-haiku-20240307"
gpt_temperature: float = 0.0
id_modelo_do_usuario: Optional[int] = None
should_have_contextual_chunks: bool = False
should_use_llama_parse: bool = False
llm_ultimas_requests: str = "gemini-2.0-flash"
doc_id: str = ""
form_response_id: str = ""
version: str = ""
class GerarDocumentoComPDFProprioSerializer(GerarDocumentoInitialSerializer):
system_prompt = None
# prompt_auxiliar = serializers.CharField(
# required=False, default=prompt_auxiliar_padrao
# )
prompt_gerar_documento = serializers.CharField(
required=False, default=prompt_gerar_documento
)
user_message = serializers.CharField(required=False, default=user_message)
num_chunks_retrieval = serializers.IntegerField(default=20)
embedding_weight = serializers.FloatField(default=0.5)
bm25_weight = serializers.FloatField(default=0.5)
context_window = serializers.IntegerField(default=3)
chunk_overlap = serializers.IntegerField(default=800)
num_k_rerank = serializers.IntegerField(default=20)
model_cohere_rerank = serializers.CharField(
required=False, default="rerank-english-v2.0"
)
more_initial_chunks_for_reranking = serializers.IntegerField(default=100)
claude_context_model = serializers.CharField(
required=False, default="claude-3-haiku-20240307"
)
gpt_temperature = serializers.FloatField(default=0)
id_modelo_do_usuario = serializers.IntegerField(required=False, default=11)
should_have_contextual_chunks = serializers.BooleanField(default=False) # type: ignore
should_use_llama_parse = serializers.BooleanField(required=False, default=False) # type: ignore
llm_ultimas_requests = serializers.CharField(required=False, default="gpt-4o-mini")
def get_obj(self):
return GerarDocumentoSerializerData(**self.validated_data) # type: ignore
@dataclass
class GerarDocumentoComPDFProprioSerializerData(GerarDocumentoInitialSerializerData):
prompt_gerar_documento: Optional[str] = field(default=None)
user_message: Optional[str] = field(default=None)
num_chunks_retrieval: int = field(default=20)
embedding_weight: float = field(default=0.5)
bm25_weight: float = field(default=0.5)
context_window: int = field(default=3)
chunk_overlap: int = field(default=800)
num_k_rerank: int = field(default=20)
model_cohere_rerank: str = field(default="rerank-english-v2.0")
more_initial_chunks_for_reranking: int = field(default=100)
claude_context_model: str = field(default="claude-3-haiku-20240307")
gpt_temperature: float = field(default=0.0)
id_modelo_do_usuario: int = field(default=11)
should_have_contextual_chunks: bool = field(default=False)
should_use_llama_parse: bool = field(default=False)
llm_ultimas_requests: str = field(default="gpt-4o-mini")
class GerarEmentaSerializer(serializers.Serializer):
files = serializers.ListField(child=FileInfoSerializer(), required=True)
user_message = serializers.CharField(required=False, default="")
chunk_size = serializers.IntegerField(required=False, default=3500)
chunk_overlap = serializers.IntegerField(required=False, default=800)
bubble_editor_version = serializers.CharField(
required=False, default="version-test"
) # Será o valor utilizado dentro da URL da requisição pro Bubble
doc_id = serializers.CharField(required=True)
form_response_id = serializers.CharField(required=True)
version = serializers.CharField(required=True)
|