File size: 4,686 Bytes
756fca0
7e48ec4
1286e81
2ce5e93
12d3e1a
1286e81
967a079
 
 
 
a263183
756fca0
1286e81
 
 
2ce5e93
bdf043b
 
967a079
bdf043b
 
 
 
967a079
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bdf043b
 
756fca0
 
 
 
 
 
 
 
967a079
 
 
 
 
 
 
 
756fca0
 
cb23311
 
 
 
 
 
756fca0
 
 
 
 
 
 
967a079
 
 
cb23311
 
 
 
095b5f1
 
 
7eb86f7
756fca0
 
 
 
 
967a079
 
 
756fca0
 
967a079
756fca0
 
 
 
a263183
967a079
 
 
756fca0
 
 
 
 
967a079
 
 
 
756fca0
a263183
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
from dataclasses import dataclass, field
from typing import List, Optional, Union
from rest_framework import serializers
from _utils.gerar_documento_utils.prompts import (
    prompt_gerar_documento,
)
from gerar_documento.serializer_base import (
    GerarDocumentoParametros,
    GerarDocumentoParametrosData,
)
from setup.environment import default_model
from django.core.files.uploadedfile import UploadedFile

user_message = "What are the main points of this document?"


class GerarDocumentoInitialSerializer(serializers.Serializer):
    files = serializers.ListField(child=serializers.FileField(), required=True)
    user_message = serializers.CharField(required=False, default=user_message)
    model = serializers.CharField(required=False, default=default_model)
    hf_embedding = serializers.CharField(required=False, default="all-MiniLM-L6-v2")
    chunk_size = serializers.IntegerField(required=False, default=3500)
    chunk_overlap = serializers.IntegerField(required=False, default=800)
    prompt_gerar_documento = serializers.CharField(
        required=False, default=prompt_gerar_documento
    )
    prompt_gerar_documento_etapa_2 = serializers.CharField(required=False)
    prompt_gerar_documento_etapa_3 = serializers.CharField(required=False)
    model_cohere_rerank = serializers.CharField(
        required=False, default="rerank-english-v2.0"
    )
    claude_context_model = serializers.CharField(
        required=False, default="claude-3-haiku-20240307"
    )
    should_have_contextual_chunks = serializers.BooleanField(default=False)  # type: ignore
    should_use_llama_parse = serializers.BooleanField(required=False, default=False)  # type: ignore
    llm_ultimas_requests = serializers.CharField(
        required=False, default="gemini-2.0-flash"
    )


@dataclass
class GerarDocumentoInitialSerializerData:
    files: List[dict]
    user_message: str = ""
    model: str = default_model
    hf_embedding: str = "all-MiniLM-L6-v2"
    chunk_size: int = 3500
    chunk_overlap: int = 800
    prompt_gerar_documento: str = ""
    prompt_gerar_documento_etapa_2: Union[str, None] = None
    prompt_gerar_documento_etapa_3: Union[str, None] = None
    model_cohere_rerank: str = "rerank-english-v2.0"
    claude_context_model: str = "claude-3-haiku-20240307"
    should_have_contextual_chunks: bool = False
    should_use_llama_parse: bool = False
    llm_ultimas_requests: str = "gemini-2.0-flash"


class FileInfoSerializer(serializers.Serializer):
    unique_id = serializers.CharField(max_length=255)
    tipo_arquivo = serializers.CharField(max_length=255)
    link_arquivo = serializers.URLField()


@dataclass
class FileInfoSerializerData:
    unique_id: str
    tipo_arquivo: str
    link_arquivo: str


class GerarDocumentoSerializer(
    GerarDocumentoInitialSerializer, GerarDocumentoParametros
):
    files = serializers.ListField(child=FileInfoSerializer(), required=True)
    bubble_editor_version = serializers.CharField(
        required=False, default="version-test"
    )  # Será o valor utilizado dentro da URL da requisição pro Bubble
    doc_id = serializers.CharField(required=True)
    form_response_id = serializers.CharField(required=True)
    version = serializers.CharField(required=True)

    def get_obj(self):
        return GerarDocumentoSerializerData(**self.validated_data)  # type: ignore


@dataclass
class GerarDocumentoSerializerData(
    GerarDocumentoParametrosData, GerarDocumentoInitialSerializerData
):
    files: List[FileInfoSerializerData]
    bubble_editor_version: str = "version-test"

    doc_id: str = ""
    form_response_id: str = ""
    version: str = ""


class GerarDocumentoComPDFProprioSerializer(
    GerarDocumentoInitialSerializer, GerarDocumentoParametros
):
    def get_obj(self):
        return GerarDocumentoSerializerData(**self.validated_data)  # type: ignore


@dataclass
class GerarDocumentoComPDFProprioSerializerData(
    GerarDocumentoParametrosData, GerarDocumentoInitialSerializerData
):
    pass


class GerarEmentaSerializer(serializers.Serializer):
    files = serializers.ListField(child=FileInfoSerializer(), required=True)
    user_message = serializers.CharField(required=False, default="")
    chunk_size = serializers.IntegerField(required=False, default=3500)
    chunk_overlap = serializers.IntegerField(required=False, default=800)
    bubble_editor_version = serializers.CharField(
        required=False, default="version-test"
    )  # Será o valor utilizado dentro da URL da requisição pro Bubble
    doc_id = serializers.CharField(required=True)
    form_response_id = serializers.CharField(required=True)
    version = serializers.CharField(required=True)