Spaces:
Running
Running
luanpoppe
commited on
Commit
·
967a079
1
Parent(s):
9644984
refactor: melhorando serializer do gerar documentos
Browse files- gerar_documento/serializer.py +43 -113
- gerar_documento/serializer_base.py +31 -0
- gerar_documento/views.py +3 -2
gerar_documento/serializer.py
CHANGED
@@ -6,42 +6,56 @@ from _utils.gerar_relatorio_modelo_usuario.prompts import (
|
|
6 |
prompt_gerar_documento,
|
7 |
prompt_auxiliar_padrao,
|
8 |
)
|
|
|
|
|
|
|
|
|
9 |
from setup.environment import default_model
|
10 |
from django.core.files.uploadedfile import UploadedFile
|
11 |
|
12 |
user_message = "What are the main points of this document?"
|
13 |
|
14 |
-
|
15 |
-
prompt_template = """
|
16 |
-
Based on the following context, provide multiple key points from the document.
|
17 |
-
For each point, create a new paragraph.
|
18 |
-
Each paragraph should be a complete, self-contained insight.
|
19 |
-
|
20 |
-
Context: {context}
|
21 |
-
|
22 |
-
Key points:
|
23 |
-
"""
|
24 |
-
|
25 |
-
|
26 |
class GerarDocumentoInitialSerializer(serializers.Serializer):
|
27 |
files = serializers.ListField(child=serializers.FileField(), required=True)
|
28 |
-
|
29 |
-
user_message = serializers.CharField(required=False, default="")
|
30 |
model = serializers.CharField(required=False, default=default_model)
|
31 |
hf_embedding = serializers.CharField(required=False, default="all-MiniLM-L6-v2")
|
32 |
chunk_size = serializers.IntegerField(required=False, default=3500)
|
33 |
chunk_overlap = serializers.IntegerField(required=False, default=800)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
|
36 |
@dataclass
|
37 |
class GerarDocumentoInitialSerializerData:
|
38 |
files: List[dict]
|
39 |
-
system_prompt: str = prompt_template
|
40 |
user_message: str = ""
|
41 |
model: str = default_model
|
42 |
hf_embedding: str = "all-MiniLM-L6-v2"
|
43 |
chunk_size: int = 3500
|
44 |
chunk_overlap: int = 800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
|
47 |
class FileInfoSerializer(serializers.Serializer):
|
@@ -57,40 +71,13 @@ class FileInfoSerializerData:
|
|
57 |
link_arquivo: str
|
58 |
|
59 |
|
60 |
-
class GerarDocumentoSerializer(
|
61 |
-
|
62 |
-
|
63 |
files = serializers.ListField(child=FileInfoSerializer(), required=True)
|
64 |
bubble_editor_version = serializers.CharField(
|
65 |
required=False, default="version-test"
|
66 |
) # Será o valor utilizado dentro da URL da requisição pro Bubble
|
67 |
-
|
68 |
-
prompt_gerar_documento = serializers.CharField(
|
69 |
-
required=False, default=prompt_gerar_documento
|
70 |
-
)
|
71 |
-
prompt_gerar_documento_etapa_2 = serializers.CharField(required=False)
|
72 |
-
prompt_gerar_documento_etapa_3 = serializers.CharField(required=False)
|
73 |
-
user_message = serializers.CharField(required=False, default=user_message)
|
74 |
-
num_chunks_retrieval = serializers.IntegerField(default=20)
|
75 |
-
embedding_weight = serializers.FloatField(default=0.5)
|
76 |
-
bm25_weight = serializers.FloatField(default=0.5)
|
77 |
-
context_window = serializers.IntegerField(default=3)
|
78 |
-
chunk_overlap = serializers.IntegerField(default=800)
|
79 |
-
num_k_rerank = serializers.IntegerField(default=20)
|
80 |
-
model_cohere_rerank = serializers.CharField(
|
81 |
-
required=False, default="rerank-english-v2.0"
|
82 |
-
)
|
83 |
-
more_initial_chunks_for_reranking = serializers.IntegerField(default=100)
|
84 |
-
claude_context_model = serializers.CharField(
|
85 |
-
required=False, default="claude-3-haiku-20240307"
|
86 |
-
)
|
87 |
-
gpt_temperature = serializers.FloatField(default=0)
|
88 |
-
id_modelo_do_usuario = serializers.IntegerField(required=False)
|
89 |
-
should_have_contextual_chunks = serializers.BooleanField(default=False) # type: ignore
|
90 |
-
should_use_llama_parse = serializers.BooleanField(required=False, default=False) # type: ignore
|
91 |
-
llm_ultimas_requests = serializers.CharField(
|
92 |
-
required=False, default="gemini-2.0-flash"
|
93 |
-
)
|
94 |
doc_id = serializers.CharField(required=True)
|
95 |
form_response_id = serializers.CharField(required=True)
|
96 |
version = serializers.CharField(required=True)
|
@@ -100,86 +87,29 @@ class GerarDocumentoSerializer(GerarDocumentoInitialSerializer):
|
|
100 |
|
101 |
|
102 |
@dataclass
|
103 |
-
class GerarDocumentoSerializerData(
|
104 |
-
|
105 |
-
|
106 |
files: List[FileInfoSerializerData]
|
107 |
bubble_editor_version: str = "version-test"
|
108 |
-
|
109 |
-
user_message: str = ""
|
110 |
-
num_chunks_retrieval: int = 20
|
111 |
-
embedding_weight: float = 0.5
|
112 |
-
bm25_weight: float = 0.5
|
113 |
-
context_window: int = 3
|
114 |
-
chunk_overlap: int = 800
|
115 |
-
num_k_rerank: int = 20
|
116 |
-
model_cohere_rerank: str = "rerank-english-v2.0"
|
117 |
-
more_initial_chunks_for_reranking: int = 100
|
118 |
-
claude_context_model: str = "claude-3-haiku-20240307"
|
119 |
-
gpt_temperature: float = 0.0
|
120 |
-
id_modelo_do_usuario: Optional[int] = None
|
121 |
-
should_have_contextual_chunks: bool = False
|
122 |
-
should_use_llama_parse: bool = False
|
123 |
-
llm_ultimas_requests: str = "gemini-2.0-flash"
|
124 |
doc_id: str = ""
|
125 |
form_response_id: str = ""
|
126 |
version: str = ""
|
127 |
|
128 |
|
129 |
-
class GerarDocumentoComPDFProprioSerializer(
|
130 |
-
|
131 |
-
|
132 |
-
# required=False, default=prompt_auxiliar_padrao
|
133 |
-
# )
|
134 |
-
prompt_gerar_documento = serializers.CharField(
|
135 |
-
required=False, default=prompt_gerar_documento
|
136 |
-
)
|
137 |
-
prompt_gerar_documento_etapa_2 = serializers.CharField(required=False)
|
138 |
-
prompt_gerar_documento_etapa_3 = serializers.CharField(required=False)
|
139 |
-
user_message = serializers.CharField(required=False, default=user_message)
|
140 |
-
num_chunks_retrieval = serializers.IntegerField(default=20)
|
141 |
-
embedding_weight = serializers.FloatField(default=0.5)
|
142 |
-
bm25_weight = serializers.FloatField(default=0.5)
|
143 |
-
context_window = serializers.IntegerField(default=3)
|
144 |
-
chunk_overlap = serializers.IntegerField(default=800)
|
145 |
-
num_k_rerank = serializers.IntegerField(default=20)
|
146 |
-
model_cohere_rerank = serializers.CharField(
|
147 |
-
required=False, default="rerank-english-v2.0"
|
148 |
-
)
|
149 |
-
more_initial_chunks_for_reranking = serializers.IntegerField(default=100)
|
150 |
-
claude_context_model = serializers.CharField(
|
151 |
-
required=False, default="claude-3-haiku-20240307"
|
152 |
-
)
|
153 |
-
gpt_temperature = serializers.FloatField(default=0)
|
154 |
-
id_modelo_do_usuario = serializers.IntegerField(required=False, default=11)
|
155 |
-
should_have_contextual_chunks = serializers.BooleanField(default=False) # type: ignore
|
156 |
-
should_use_llama_parse = serializers.BooleanField(required=False, default=False) # type: ignore
|
157 |
-
llm_ultimas_requests = serializers.CharField(required=False, default="gpt-4o-mini")
|
158 |
-
|
159 |
def get_obj(self):
|
160 |
return GerarDocumentoSerializerData(**self.validated_data) # type: ignore
|
161 |
|
162 |
|
163 |
@dataclass
|
164 |
-
class GerarDocumentoComPDFProprioSerializerData(
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
user_message: Optional[str] = field(default=None)
|
169 |
-
num_chunks_retrieval: int = field(default=20)
|
170 |
-
embedding_weight: float = field(default=0.5)
|
171 |
-
bm25_weight: float = field(default=0.5)
|
172 |
-
context_window: int = field(default=3)
|
173 |
-
chunk_overlap: int = field(default=800)
|
174 |
-
num_k_rerank: int = field(default=20)
|
175 |
-
model_cohere_rerank: str = field(default="rerank-english-v2.0")
|
176 |
-
more_initial_chunks_for_reranking: int = field(default=100)
|
177 |
-
claude_context_model: str = field(default="claude-3-haiku-20240307")
|
178 |
-
gpt_temperature: float = field(default=0.0)
|
179 |
-
id_modelo_do_usuario: int = field(default=11)
|
180 |
-
should_have_contextual_chunks: bool = field(default=False)
|
181 |
-
should_use_llama_parse: bool = field(default=False)
|
182 |
-
llm_ultimas_requests: str = field(default="gpt-4o-mini")
|
183 |
|
184 |
|
185 |
class GerarEmentaSerializer(serializers.Serializer):
|
|
|
6 |
prompt_gerar_documento,
|
7 |
prompt_auxiliar_padrao,
|
8 |
)
|
9 |
+
from gerar_documento.serializer_base import (
|
10 |
+
GerarDocumentoParametros,
|
11 |
+
GerarDocumentoParametrosData,
|
12 |
+
)
|
13 |
from setup.environment import default_model
|
14 |
from django.core.files.uploadedfile import UploadedFile
|
15 |
|
16 |
user_message = "What are the main points of this document?"
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
class GerarDocumentoInitialSerializer(serializers.Serializer):
|
19 |
files = serializers.ListField(child=serializers.FileField(), required=True)
|
20 |
+
user_message = serializers.CharField(required=False, default=user_message)
|
|
|
21 |
model = serializers.CharField(required=False, default=default_model)
|
22 |
hf_embedding = serializers.CharField(required=False, default="all-MiniLM-L6-v2")
|
23 |
chunk_size = serializers.IntegerField(required=False, default=3500)
|
24 |
chunk_overlap = serializers.IntegerField(required=False, default=800)
|
25 |
+
prompt_gerar_documento = serializers.CharField(
|
26 |
+
required=False, default=prompt_gerar_documento
|
27 |
+
)
|
28 |
+
prompt_gerar_documento_etapa_2 = serializers.CharField(required=False)
|
29 |
+
prompt_gerar_documento_etapa_3 = serializers.CharField(required=False)
|
30 |
+
model_cohere_rerank = serializers.CharField(
|
31 |
+
required=False, default="rerank-english-v2.0"
|
32 |
+
)
|
33 |
+
claude_context_model = serializers.CharField(
|
34 |
+
required=False, default="claude-3-haiku-20240307"
|
35 |
+
)
|
36 |
+
should_have_contextual_chunks = serializers.BooleanField(default=False) # type: ignore
|
37 |
+
should_use_llama_parse = serializers.BooleanField(required=False, default=False) # type: ignore
|
38 |
+
llm_ultimas_requests = serializers.CharField(
|
39 |
+
required=False, default="gemini-2.0-flash"
|
40 |
+
)
|
41 |
|
42 |
|
43 |
@dataclass
|
44 |
class GerarDocumentoInitialSerializerData:
|
45 |
files: List[dict]
|
|
|
46 |
user_message: str = ""
|
47 |
model: str = default_model
|
48 |
hf_embedding: str = "all-MiniLM-L6-v2"
|
49 |
chunk_size: int = 3500
|
50 |
chunk_overlap: int = 800
|
51 |
+
prompt_gerar_documento: str = ""
|
52 |
+
prompt_gerar_documento_etapa_2: Union[str, None] = None
|
53 |
+
prompt_gerar_documento_etapa_3: Union[str, None] = None
|
54 |
+
model_cohere_rerank: str = "rerank-english-v2.0"
|
55 |
+
claude_context_model: str = "claude-3-haiku-20240307"
|
56 |
+
should_have_contextual_chunks: bool = False
|
57 |
+
should_use_llama_parse: bool = False
|
58 |
+
llm_ultimas_requests: str = "gemini-2.0-flash"
|
59 |
|
60 |
|
61 |
class FileInfoSerializer(serializers.Serializer):
|
|
|
71 |
link_arquivo: str
|
72 |
|
73 |
|
74 |
+
class GerarDocumentoSerializer(
|
75 |
+
GerarDocumentoInitialSerializer, GerarDocumentoParametros
|
76 |
+
):
|
77 |
files = serializers.ListField(child=FileInfoSerializer(), required=True)
|
78 |
bubble_editor_version = serializers.CharField(
|
79 |
required=False, default="version-test"
|
80 |
) # Será o valor utilizado dentro da URL da requisição pro Bubble
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
doc_id = serializers.CharField(required=True)
|
82 |
form_response_id = serializers.CharField(required=True)
|
83 |
version = serializers.CharField(required=True)
|
|
|
87 |
|
88 |
|
89 |
@dataclass
|
90 |
+
class GerarDocumentoSerializerData(
|
91 |
+
GerarDocumentoParametrosData, GerarDocumentoInitialSerializerData
|
92 |
+
):
|
93 |
files: List[FileInfoSerializerData]
|
94 |
bubble_editor_version: str = "version-test"
|
95 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
doc_id: str = ""
|
97 |
form_response_id: str = ""
|
98 |
version: str = ""
|
99 |
|
100 |
|
101 |
+
class GerarDocumentoComPDFProprioSerializer(
|
102 |
+
GerarDocumentoInitialSerializer, GerarDocumentoParametros
|
103 |
+
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
def get_obj(self):
|
105 |
return GerarDocumentoSerializerData(**self.validated_data) # type: ignore
|
106 |
|
107 |
|
108 |
@dataclass
|
109 |
+
class GerarDocumentoComPDFProprioSerializerData(
|
110 |
+
GerarDocumentoParametrosData, GerarDocumentoInitialSerializerData
|
111 |
+
):
|
112 |
+
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
|
115 |
class GerarEmentaSerializer(serializers.Serializer):
|
gerar_documento/serializer_base.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass, field
|
2 |
+
from email.policy import default
|
3 |
+
from typing import List, Optional, Union
|
4 |
+
from rest_framework import serializers
|
5 |
+
from _utils.gerar_relatorio_modelo_usuario.prompts import (
|
6 |
+
prompt_gerar_documento,
|
7 |
+
prompt_auxiliar_padrao,
|
8 |
+
)
|
9 |
+
from setup.environment import default_model
|
10 |
+
from django.core.files.uploadedfile import UploadedFile
|
11 |
+
|
12 |
+
|
13 |
+
class GerarDocumentoParametros:
|
14 |
+
num_chunks_retrieval = serializers.IntegerField(default=20)
|
15 |
+
embedding_weight = serializers.FloatField(default=0.5)
|
16 |
+
bm25_weight = serializers.FloatField(default=0.5)
|
17 |
+
context_window = serializers.IntegerField(default=3)
|
18 |
+
num_k_rerank = serializers.IntegerField(default=20)
|
19 |
+
more_initial_chunks_for_reranking = serializers.IntegerField(default=100)
|
20 |
+
gpt_temperature = serializers.FloatField(default=0)
|
21 |
+
|
22 |
+
|
23 |
+
@dataclass
|
24 |
+
class GerarDocumentoParametrosData:
|
25 |
+
num_chunks_retrieval: int = 20
|
26 |
+
embedding_weight: float = 0.5
|
27 |
+
bm25_weight: float = 0.5
|
28 |
+
context_window: int = 3
|
29 |
+
num_k_rerank: int = 20
|
30 |
+
more_initial_chunks_for_reranking: int = 100
|
31 |
+
gpt_temperature: float = 0.0
|
gerar_documento/views.py
CHANGED
@@ -86,8 +86,9 @@ class GerarDocumentoComPDFProprioView(AsyncAPIView):
|
|
86 |
async def post(self, request):
|
87 |
self.axiom_instance.generate_new_uuid()
|
88 |
print(f"\n\nDATA E HORA DA REQUISIÇÃO: {datetime.now()}")
|
89 |
-
self.axiom_instance.send_axiom(
|
90 |
-
|
|
|
91 |
serializer = GerarDocumentoComPDFProprioSerializer(data=request.data)
|
92 |
|
93 |
if serializer.is_valid(raise_exception=True):
|
|
|
86 |
async def post(self, request):
|
87 |
self.axiom_instance.generate_new_uuid()
|
88 |
print(f"\n\nDATA E HORA DA REQUISIÇÃO: {datetime.now()}")
|
89 |
+
self.axiom_instance.send_axiom(
|
90 |
+
f"COMEÇOU NOVA REQUISIÇÃO - request.data: {request.data}"
|
91 |
+
)
|
92 |
serializer = GerarDocumentoComPDFProprioSerializer(data=request.data)
|
93 |
|
94 |
if serializer.is_valid(raise_exception=True):
|