luanpoppe commited on
Commit
967a079
·
1 Parent(s): 9644984

refactor: melhorando serializer do gerar documentos

Browse files
gerar_documento/serializer.py CHANGED
@@ -6,42 +6,56 @@ from _utils.gerar_relatorio_modelo_usuario.prompts import (
6
  prompt_gerar_documento,
7
  prompt_auxiliar_padrao,
8
  )
 
 
 
 
9
  from setup.environment import default_model
10
  from django.core.files.uploadedfile import UploadedFile
11
 
12
  user_message = "What are the main points of this document?"
13
 
14
-
15
- prompt_template = """
16
- Based on the following context, provide multiple key points from the document.
17
- For each point, create a new paragraph.
18
- Each paragraph should be a complete, self-contained insight.
19
-
20
- Context: {context}
21
-
22
- Key points:
23
- """
24
-
25
-
26
  class GerarDocumentoInitialSerializer(serializers.Serializer):
27
  files = serializers.ListField(child=serializers.FileField(), required=True)
28
- system_prompt = serializers.CharField(required=False, default=prompt_template)
29
- user_message = serializers.CharField(required=False, default="")
30
  model = serializers.CharField(required=False, default=default_model)
31
  hf_embedding = serializers.CharField(required=False, default="all-MiniLM-L6-v2")
32
  chunk_size = serializers.IntegerField(required=False, default=3500)
33
  chunk_overlap = serializers.IntegerField(required=False, default=800)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
 
36
  @dataclass
37
  class GerarDocumentoInitialSerializerData:
38
  files: List[dict]
39
- system_prompt: str = prompt_template
40
  user_message: str = ""
41
  model: str = default_model
42
  hf_embedding: str = "all-MiniLM-L6-v2"
43
  chunk_size: int = 3500
44
  chunk_overlap: int = 800
 
 
 
 
 
 
 
 
45
 
46
 
47
  class FileInfoSerializer(serializers.Serializer):
@@ -57,40 +71,13 @@ class FileInfoSerializerData:
57
  link_arquivo: str
58
 
59
 
60
- class GerarDocumentoSerializer(GerarDocumentoInitialSerializer):
61
- system_prompt = None
62
-
63
  files = serializers.ListField(child=FileInfoSerializer(), required=True)
64
  bubble_editor_version = serializers.CharField(
65
  required=False, default="version-test"
66
  ) # Será o valor utilizado dentro da URL da requisição pro Bubble
67
-
68
- prompt_gerar_documento = serializers.CharField(
69
- required=False, default=prompt_gerar_documento
70
- )
71
- prompt_gerar_documento_etapa_2 = serializers.CharField(required=False)
72
- prompt_gerar_documento_etapa_3 = serializers.CharField(required=False)
73
- user_message = serializers.CharField(required=False, default=user_message)
74
- num_chunks_retrieval = serializers.IntegerField(default=20)
75
- embedding_weight = serializers.FloatField(default=0.5)
76
- bm25_weight = serializers.FloatField(default=0.5)
77
- context_window = serializers.IntegerField(default=3)
78
- chunk_overlap = serializers.IntegerField(default=800)
79
- num_k_rerank = serializers.IntegerField(default=20)
80
- model_cohere_rerank = serializers.CharField(
81
- required=False, default="rerank-english-v2.0"
82
- )
83
- more_initial_chunks_for_reranking = serializers.IntegerField(default=100)
84
- claude_context_model = serializers.CharField(
85
- required=False, default="claude-3-haiku-20240307"
86
- )
87
- gpt_temperature = serializers.FloatField(default=0)
88
- id_modelo_do_usuario = serializers.IntegerField(required=False)
89
- should_have_contextual_chunks = serializers.BooleanField(default=False) # type: ignore
90
- should_use_llama_parse = serializers.BooleanField(required=False, default=False) # type: ignore
91
- llm_ultimas_requests = serializers.CharField(
92
- required=False, default="gemini-2.0-flash"
93
- )
94
  doc_id = serializers.CharField(required=True)
95
  form_response_id = serializers.CharField(required=True)
96
  version = serializers.CharField(required=True)
@@ -100,86 +87,29 @@ class GerarDocumentoSerializer(GerarDocumentoInitialSerializer):
100
 
101
 
102
  @dataclass
103
- class GerarDocumentoSerializerData(GerarDocumentoInitialSerializerData):
104
- prompt_gerar_documento_etapa_2: Union[str, None] = None
105
- prompt_gerar_documento_etapa_3: Union[str, None] = None
106
  files: List[FileInfoSerializerData]
107
  bubble_editor_version: str = "version-test"
108
- prompt_gerar_documento: str = ""
109
- user_message: str = ""
110
- num_chunks_retrieval: int = 20
111
- embedding_weight: float = 0.5
112
- bm25_weight: float = 0.5
113
- context_window: int = 3
114
- chunk_overlap: int = 800
115
- num_k_rerank: int = 20
116
- model_cohere_rerank: str = "rerank-english-v2.0"
117
- more_initial_chunks_for_reranking: int = 100
118
- claude_context_model: str = "claude-3-haiku-20240307"
119
- gpt_temperature: float = 0.0
120
- id_modelo_do_usuario: Optional[int] = None
121
- should_have_contextual_chunks: bool = False
122
- should_use_llama_parse: bool = False
123
- llm_ultimas_requests: str = "gemini-2.0-flash"
124
  doc_id: str = ""
125
  form_response_id: str = ""
126
  version: str = ""
127
 
128
 
129
- class GerarDocumentoComPDFProprioSerializer(GerarDocumentoInitialSerializer):
130
- system_prompt = None
131
- # prompt_auxiliar = serializers.CharField(
132
- # required=False, default=prompt_auxiliar_padrao
133
- # )
134
- prompt_gerar_documento = serializers.CharField(
135
- required=False, default=prompt_gerar_documento
136
- )
137
- prompt_gerar_documento_etapa_2 = serializers.CharField(required=False)
138
- prompt_gerar_documento_etapa_3 = serializers.CharField(required=False)
139
- user_message = serializers.CharField(required=False, default=user_message)
140
- num_chunks_retrieval = serializers.IntegerField(default=20)
141
- embedding_weight = serializers.FloatField(default=0.5)
142
- bm25_weight = serializers.FloatField(default=0.5)
143
- context_window = serializers.IntegerField(default=3)
144
- chunk_overlap = serializers.IntegerField(default=800)
145
- num_k_rerank = serializers.IntegerField(default=20)
146
- model_cohere_rerank = serializers.CharField(
147
- required=False, default="rerank-english-v2.0"
148
- )
149
- more_initial_chunks_for_reranking = serializers.IntegerField(default=100)
150
- claude_context_model = serializers.CharField(
151
- required=False, default="claude-3-haiku-20240307"
152
- )
153
- gpt_temperature = serializers.FloatField(default=0)
154
- id_modelo_do_usuario = serializers.IntegerField(required=False, default=11)
155
- should_have_contextual_chunks = serializers.BooleanField(default=False) # type: ignore
156
- should_use_llama_parse = serializers.BooleanField(required=False, default=False) # type: ignore
157
- llm_ultimas_requests = serializers.CharField(required=False, default="gpt-4o-mini")
158
-
159
  def get_obj(self):
160
  return GerarDocumentoSerializerData(**self.validated_data) # type: ignore
161
 
162
 
163
  @dataclass
164
- class GerarDocumentoComPDFProprioSerializerData(GerarDocumentoInitialSerializerData):
165
- prompt_gerar_documento_etapa_2: Union[str, None] = None
166
- prompt_gerar_documento_etapa_3: Union[str, None] = None
167
- prompt_gerar_documento: Optional[str] = field(default=None)
168
- user_message: Optional[str] = field(default=None)
169
- num_chunks_retrieval: int = field(default=20)
170
- embedding_weight: float = field(default=0.5)
171
- bm25_weight: float = field(default=0.5)
172
- context_window: int = field(default=3)
173
- chunk_overlap: int = field(default=800)
174
- num_k_rerank: int = field(default=20)
175
- model_cohere_rerank: str = field(default="rerank-english-v2.0")
176
- more_initial_chunks_for_reranking: int = field(default=100)
177
- claude_context_model: str = field(default="claude-3-haiku-20240307")
178
- gpt_temperature: float = field(default=0.0)
179
- id_modelo_do_usuario: int = field(default=11)
180
- should_have_contextual_chunks: bool = field(default=False)
181
- should_use_llama_parse: bool = field(default=False)
182
- llm_ultimas_requests: str = field(default="gpt-4o-mini")
183
 
184
 
185
  class GerarEmentaSerializer(serializers.Serializer):
 
6
  prompt_gerar_documento,
7
  prompt_auxiliar_padrao,
8
  )
9
+ from gerar_documento.serializer_base import (
10
+ GerarDocumentoParametros,
11
+ GerarDocumentoParametrosData,
12
+ )
13
  from setup.environment import default_model
14
  from django.core.files.uploadedfile import UploadedFile
15
 
16
  user_message = "What are the main points of this document?"
17
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  class GerarDocumentoInitialSerializer(serializers.Serializer):
19
  files = serializers.ListField(child=serializers.FileField(), required=True)
20
+ user_message = serializers.CharField(required=False, default=user_message)
 
21
  model = serializers.CharField(required=False, default=default_model)
22
  hf_embedding = serializers.CharField(required=False, default="all-MiniLM-L6-v2")
23
  chunk_size = serializers.IntegerField(required=False, default=3500)
24
  chunk_overlap = serializers.IntegerField(required=False, default=800)
25
+ prompt_gerar_documento = serializers.CharField(
26
+ required=False, default=prompt_gerar_documento
27
+ )
28
+ prompt_gerar_documento_etapa_2 = serializers.CharField(required=False)
29
+ prompt_gerar_documento_etapa_3 = serializers.CharField(required=False)
30
+ model_cohere_rerank = serializers.CharField(
31
+ required=False, default="rerank-english-v2.0"
32
+ )
33
+ claude_context_model = serializers.CharField(
34
+ required=False, default="claude-3-haiku-20240307"
35
+ )
36
+ should_have_contextual_chunks = serializers.BooleanField(default=False) # type: ignore
37
+ should_use_llama_parse = serializers.BooleanField(required=False, default=False) # type: ignore
38
+ llm_ultimas_requests = serializers.CharField(
39
+ required=False, default="gemini-2.0-flash"
40
+ )
41
 
42
 
43
  @dataclass
44
  class GerarDocumentoInitialSerializerData:
45
  files: List[dict]
 
46
  user_message: str = ""
47
  model: str = default_model
48
  hf_embedding: str = "all-MiniLM-L6-v2"
49
  chunk_size: int = 3500
50
  chunk_overlap: int = 800
51
+ prompt_gerar_documento: str = ""
52
+ prompt_gerar_documento_etapa_2: Union[str, None] = None
53
+ prompt_gerar_documento_etapa_3: Union[str, None] = None
54
+ model_cohere_rerank: str = "rerank-english-v2.0"
55
+ claude_context_model: str = "claude-3-haiku-20240307"
56
+ should_have_contextual_chunks: bool = False
57
+ should_use_llama_parse: bool = False
58
+ llm_ultimas_requests: str = "gemini-2.0-flash"
59
 
60
 
61
  class FileInfoSerializer(serializers.Serializer):
 
71
  link_arquivo: str
72
 
73
 
74
+ class GerarDocumentoSerializer(
75
+ GerarDocumentoInitialSerializer, GerarDocumentoParametros
76
+ ):
77
  files = serializers.ListField(child=FileInfoSerializer(), required=True)
78
  bubble_editor_version = serializers.CharField(
79
  required=False, default="version-test"
80
  ) # Será o valor utilizado dentro da URL da requisição pro Bubble
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  doc_id = serializers.CharField(required=True)
82
  form_response_id = serializers.CharField(required=True)
83
  version = serializers.CharField(required=True)
 
87
 
88
 
89
  @dataclass
90
+ class GerarDocumentoSerializerData(
91
+ GerarDocumentoParametrosData, GerarDocumentoInitialSerializerData
92
+ ):
93
  files: List[FileInfoSerializerData]
94
  bubble_editor_version: str = "version-test"
95
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  doc_id: str = ""
97
  form_response_id: str = ""
98
  version: str = ""
99
 
100
 
101
+ class GerarDocumentoComPDFProprioSerializer(
102
+ GerarDocumentoInitialSerializer, GerarDocumentoParametros
103
+ ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  def get_obj(self):
105
  return GerarDocumentoSerializerData(**self.validated_data) # type: ignore
106
 
107
 
108
  @dataclass
109
+ class GerarDocumentoComPDFProprioSerializerData(
110
+ GerarDocumentoParametrosData, GerarDocumentoInitialSerializerData
111
+ ):
112
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
 
115
  class GerarEmentaSerializer(serializers.Serializer):
gerar_documento/serializer_base.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass, field
2
+ from email.policy import default
3
+ from typing import List, Optional, Union
4
+ from rest_framework import serializers
5
+ from _utils.gerar_relatorio_modelo_usuario.prompts import (
6
+ prompt_gerar_documento,
7
+ prompt_auxiliar_padrao,
8
+ )
9
+ from setup.environment import default_model
10
+ from django.core.files.uploadedfile import UploadedFile
11
+
12
+
13
+ class GerarDocumentoParametros:
14
+ num_chunks_retrieval = serializers.IntegerField(default=20)
15
+ embedding_weight = serializers.FloatField(default=0.5)
16
+ bm25_weight = serializers.FloatField(default=0.5)
17
+ context_window = serializers.IntegerField(default=3)
18
+ num_k_rerank = serializers.IntegerField(default=20)
19
+ more_initial_chunks_for_reranking = serializers.IntegerField(default=100)
20
+ gpt_temperature = serializers.FloatField(default=0)
21
+
22
+
23
+ @dataclass
24
+ class GerarDocumentoParametrosData:
25
+ num_chunks_retrieval: int = 20
26
+ embedding_weight: float = 0.5
27
+ bm25_weight: float = 0.5
28
+ context_window: int = 3
29
+ num_k_rerank: int = 20
30
+ more_initial_chunks_for_reranking: int = 100
31
+ gpt_temperature: float = 0.0
gerar_documento/views.py CHANGED
@@ -86,8 +86,9 @@ class GerarDocumentoComPDFProprioView(AsyncAPIView):
86
  async def post(self, request):
87
  self.axiom_instance.generate_new_uuid()
88
  print(f"\n\nDATA E HORA DA REQUISIÇÃO: {datetime.now()}")
89
- self.axiom_instance.send_axiom("COMEÇOU NOVA REQUISIÇÃO")
90
- self.axiom_instance.send_axiom(f"request.data: {request.data}")
 
91
  serializer = GerarDocumentoComPDFProprioSerializer(data=request.data)
92
 
93
  if serializer.is_valid(raise_exception=True):
 
86
  async def post(self, request):
87
  self.axiom_instance.generate_new_uuid()
88
  print(f"\n\nDATA E HORA DA REQUISIÇÃO: {datetime.now()}")
89
+ self.axiom_instance.send_axiom(
90
+ f"COMEÇOU NOVA REQUISIÇÃO - request.data: {request.data}"
91
+ )
92
  serializer = GerarDocumentoComPDFProprioSerializer(data=request.data)
93
 
94
  if serializer.is_valid(raise_exception=True):