luanpoppe commited on
Commit
bdf043b
·
1 Parent(s): a263183

feat: removendo arquivos e pastas antigas

Browse files
_antigos/__init__.py DELETED
File without changes
_antigos/pdfs/__init__.py DELETED
File without changes
_antigos/pdfs/admin.py DELETED
@@ -1,7 +0,0 @@
1
- from django.contrib import admin
2
-
3
- # from pdfs.models import PDFsModel
4
-
5
- # Register your models here.
6
-
7
- # admin.site.register(PDFsModel)
 
 
 
 
 
 
 
 
_antigos/pdfs/apps.py DELETED
@@ -1,6 +0,0 @@
1
- from django.apps import AppConfig
2
-
3
-
4
- class PdfsConfig(AppConfig):
5
- default_auto_field = "django.db.models.BigAutoField"
6
- name = "pdfs"
 
 
 
 
 
 
 
_antigos/pdfs/migrations/0001_initial.py DELETED
@@ -1,21 +0,0 @@
1
- # Generated by Django 4.1 on 2024-11-09 22:42
2
-
3
- from django.db import migrations, models
4
-
5
-
6
- class Migration(migrations.Migration):
7
-
8
- initial = True
9
-
10
- dependencies = [
11
- ]
12
-
13
- operations = [
14
- migrations.CreateModel(
15
- name='EndpointTesteModel',
16
- fields=[
17
- ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
18
- ('teste', models.CharField(max_length=300)),
19
- ],
20
- ),
21
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
_antigos/pdfs/migrations/0002_delete_endpointtestemodel.py DELETED
@@ -1,16 +0,0 @@
1
- # Generated by Django 4.1 on 2024-11-16 00:46
2
-
3
- from django.db import migrations
4
-
5
-
6
- class Migration(migrations.Migration):
7
-
8
- dependencies = [
9
- ('pdfs', '0001_initial'),
10
- ]
11
-
12
- operations = [
13
- migrations.DeleteModel(
14
- name='EndpointTesteModel',
15
- ),
16
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
_antigos/pdfs/migrations/__init__.py DELETED
File without changes
_antigos/pdfs/models.py DELETED
@@ -1,4 +0,0 @@
1
- from django.db import models
2
-
3
- # Create your models here.
4
- # class PDFsModel(models.Model):
 
 
 
 
 
_antigos/pdfs/serializer.py DELETED
@@ -1,8 +0,0 @@
1
- from rest_framework import serializers
2
-
3
- class PDFUploadSerializer(serializers.Serializer):
4
- files = serializers.ListField(child=serializers.FileField(), required=True)
5
- system_prompt = serializers.CharField(required=True)
6
- user_message = serializers.CharField(required=True)
7
- model = serializers.CharField(required=False)
8
- embedding = serializers.CharField(required=False)
 
 
 
 
 
 
 
 
 
_antigos/pdfs/tests.py DELETED
@@ -1,3 +0,0 @@
1
- from django.test import TestCase
2
-
3
- # Create your tests here.
 
 
 
 
_antigos/pdfs/views.py DELETED
@@ -1,52 +0,0 @@
1
- import tempfile, os
2
- from pdfs.serializer import PDFUploadSerializer
3
- from setup.environment import default_model
4
- from drf_spectacular.utils import extend_schema
5
-
6
- from rest_framework.decorators import api_view, parser_classes
7
- from rest_framework.parsers import MultiPartParser
8
- from rest_framework.response import Response
9
-
10
- from _utils.main import get_llm_answer
11
-
12
- @extend_schema(
13
- request=PDFUploadSerializer,
14
- )
15
- @api_view(["POST"])
16
- @parser_classes([MultiPartParser])
17
- def getPDF(request):
18
- if request.method == "POST":
19
- serializer = PDFUploadSerializer(data=request.data)
20
- if serializer.is_valid(raise_exception=True):
21
- listaPDFs = []
22
- print('\n\n')
23
- data = request.data
24
- print('data: ', data)
25
- embedding = serializer.validated_data.get("embedding", "gpt")
26
- model = serializer.validated_data.get("model", default_model)
27
-
28
- # pdf_file = serializer.validated_data['file']
29
- for file in serializer.validated_data['files']:
30
- print("file: ", file)
31
- file.seek(0)
32
- # Create a temporary file to save the uploaded PDF
33
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
34
- # Write the uploaded file content to the temporary file
35
- for chunk in file.chunks():
36
- temp_file.write(chunk)
37
- temp_file_path = temp_file.name # Get the path of the temporary file
38
- listaPDFs.append(temp_file_path)
39
- # print('temp_file_path: ', temp_file_path)
40
- print('listaPDFs: ', listaPDFs)
41
-
42
- resposta_llm = None
43
- # resposta_llm = get_llm_answer(data["system_prompt"], data["user_message"], temp_file_path, model=model, embedding=embedding)
44
- resposta_llm = get_llm_answer(data["system_prompt"], data["user_message"], listaPDFs, model=model, embedding=embedding)
45
-
46
- for file in listaPDFs:
47
- os.remove(file)
48
- # os.remove(temp_file_path)
49
-
50
- return Response({
51
- "Resposta": resposta_llm
52
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
_antigos/resumos/__init__.py DELETED
File without changes
_antigos/resumos/admin.py DELETED
@@ -1,3 +0,0 @@
1
- from django.contrib import admin
2
-
3
- # Register your models here.
 
 
 
 
_antigos/resumos/apps.py DELETED
@@ -1,6 +0,0 @@
1
- from django.apps import AppConfig
2
-
3
-
4
- class ResumosConfig(AppConfig):
5
- default_auto_field = 'django.db.models.BigAutoField'
6
- name = 'resumos'
 
 
 
 
 
 
 
_antigos/resumos/migrations/__init__.py DELETED
File without changes
_antigos/resumos/models.py DELETED
@@ -1,3 +0,0 @@
1
- from django.db import models
2
-
3
- # Create your models here.
 
 
 
 
_antigos/resumos/serializer.py DELETED
@@ -1,29 +0,0 @@
1
- from rest_framework import serializers
2
- from setup.environment import default_model
3
- # from _utils.utils import DEFAULT_SYSTEM_PROMPT
4
-
5
- prompt_template = """
6
- Based on the following context, provide multiple key points from the document.
7
- For each point, create a new paragraph.
8
- Each paragraph should be a complete, self-contained insight.
9
-
10
- Context: {context}
11
-
12
- Key points:
13
- """
14
-
15
- class ResumoPDFSerializer(serializers.Serializer):
16
- files = serializers.ListField(child=serializers.FileField(), required=True)
17
- system_prompt = serializers.CharField(required=False)
18
- user_message = serializers.CharField(required=False, default="")
19
- model = serializers.CharField(required=False)
20
- iterative_refinement = serializers.BooleanField(required=False, default=False) # type: ignore
21
-
22
- class ResumoCursorSerializer(serializers.Serializer):
23
- files = serializers.ListField(child=serializers.FileField(), required=True)
24
- system_prompt = serializers.CharField(required=False, default=prompt_template)
25
- user_message = serializers.CharField(required=False, default="")
26
- model = serializers.CharField(required=False, default=default_model)
27
- hf_embedding = serializers.CharField(required=False, default="all-MiniLM-L6-v2")
28
- chunk_size = serializers.IntegerField(required=False, default=3500)
29
- chunk_overlap = serializers.IntegerField(required=False, default=800)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
_antigos/resumos/tests.py DELETED
@@ -1,3 +0,0 @@
1
- from django.test import TestCase
2
-
3
- # Create your tests here.
 
 
 
 
_antigos/resumos/views.py DELETED
@@ -1,144 +0,0 @@
1
- from rest_framework.views import APIView
2
- import tempfile, os
3
- from rest_framework.response import Response
4
- from _utils.resumo_simples_cursor import get_llm_summary_answer_by_cursor
5
- from _utils.utils import DEFAULT_SYSTEM_PROMPT
6
- from .serializer import (
7
- ResumoPDFSerializer,
8
- ResumoCursorSerializer,
9
- )
10
- from _utils.main import get_llm_answer_summary, get_llm_answer_summary_with_embedding
11
- from setup.environment import default_model
12
- from rest_framework.parsers import MultiPartParser
13
- from drf_spectacular.utils import extend_schema
14
-
15
-
16
- class ResumoView(APIView):
17
- parser_classes = [MultiPartParser]
18
-
19
- @extend_schema(
20
- request=ResumoPDFSerializer,
21
- )
22
- def post(self, request):
23
- serializer = ResumoPDFSerializer(data=request.data)
24
- if serializer.is_valid(raise_exception=True):
25
- listaPDFs = []
26
- data = serializer.validated_data
27
- model = serializer.validated_data.get("model", default_model)
28
- print("serializer.validated_data: ", serializer.validated_data)
29
-
30
- for file in serializer.validated_data["files"]:
31
- print("file: ", file)
32
- file.seek(0)
33
- with tempfile.NamedTemporaryFile(
34
- delete=False, suffix=".pdf"
35
- ) as temp_file: # Create a temporary file to save the uploaded PDF
36
- for (
37
- chunk
38
- ) in (
39
- file.chunks()
40
- ): # Write the uploaded file content to the temporary file
41
- temp_file.write(chunk)
42
- temp_file_path = (
43
- temp_file.name
44
- ) # Get the path of the temporary file
45
- listaPDFs.append(temp_file_path)
46
- # print('listaPDFs: ', listaPDFs)
47
-
48
- system_prompt = data.get("system_prompt", DEFAULT_SYSTEM_PROMPT)
49
- resposta_llm = get_llm_answer_summary(
50
- system_prompt,
51
- data["user_message"],
52
- listaPDFs,
53
- model=model,
54
- isIterativeRefinement=data["iterative_refinement"],
55
- )
56
-
57
- for file in listaPDFs:
58
- os.remove(file)
59
-
60
- return Response({"resposta": resposta_llm})
61
-
62
-
63
- class ResumoEmbeddingView(APIView):
64
- parser_classes = [MultiPartParser]
65
-
66
- @extend_schema(
67
- request=ResumoPDFSerializer,
68
- )
69
- def post(self, request):
70
- serializer = ResumoPDFSerializer(data=request.data)
71
- if serializer.is_valid(raise_exception=True):
72
- listaPDFs = []
73
- data = serializer.validated_data
74
- model = serializer.validated_data.get("model", default_model)
75
- print("serializer.validated_data: ", serializer.validated_data)
76
-
77
- for file in serializer.validated_data["files"]:
78
- file.seek(0)
79
- with tempfile.NamedTemporaryFile(
80
- delete=False, suffix=".pdf"
81
- ) as temp_file: # Create a temporary file to save the uploaded PDF
82
- for (
83
- chunk
84
- ) in (
85
- file.chunks()
86
- ): # Write the uploaded file content to the temporary file
87
- temp_file.write(chunk)
88
- temp_file_path = (
89
- temp_file.name
90
- ) # Get the path of the temporary file
91
- listaPDFs.append(temp_file_path)
92
- print("listaPDFs: ", listaPDFs)
93
-
94
- system_prompt = data.get("system_prompt", DEFAULT_SYSTEM_PROMPT)
95
- resposta_llm = get_llm_answer_summary_with_embedding(
96
- system_prompt,
97
- data["user_message"],
98
- listaPDFs,
99
- model=model,
100
- isIterativeRefinement=data["iterative_refinement"],
101
- )
102
-
103
- for file in listaPDFs:
104
- os.remove(file)
105
-
106
- return Response({"resposta": resposta_llm})
107
-
108
-
109
- class ResumoSimplesCursorView(APIView):
110
- parser_classes = [MultiPartParser]
111
-
112
- @extend_schema(
113
- request=ResumoCursorSerializer,
114
- )
115
- def post(self, request):
116
- serializer = ResumoCursorSerializer(data=request.data)
117
- if serializer.is_valid(raise_exception=True):
118
- listaPDFs = []
119
- data = serializer.validated_data
120
- print("\nserializer.validated_data: ", serializer.validated_data)
121
-
122
- for file in serializer.validated_data["files"]:
123
- file.seek(0)
124
- with tempfile.NamedTemporaryFile(
125
- delete=False, suffix=".pdf"
126
- ) as temp_file: # Create a temporary file to save the uploaded PDF
127
- for (
128
- chunk
129
- ) in (
130
- file.chunks()
131
- ): # Write the uploaded file content to the temporary file
132
- temp_file.write(chunk)
133
- temp_file_path = (
134
- temp_file.name
135
- ) # Get the path of the temporary file
136
- listaPDFs.append(temp_file_path)
137
- print("listaPDFs: ", listaPDFs)
138
-
139
- resposta_llm = get_llm_summary_answer_by_cursor(data, listaPDFs)
140
-
141
- for file in listaPDFs:
142
- os.remove(file)
143
-
144
- return Response({"resposta": resposta_llm})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
gerar_documento/serializer.py CHANGED
@@ -1,5 +1,4 @@
1
  from rest_framework import serializers
2
- from _antigos.resumos.serializer import ResumoCursorSerializer
3
  from _utils.gerar_relatorio_modelo_usuario.prompts import (
4
  prompt_gerar_documento,
5
  prompt_auxiliar_padrao,
@@ -9,13 +8,34 @@ from setup.environment import default_model
9
  user_message = "What are the main points of this document?"
10
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  class FileInfoSerializer(serializers.Serializer):
13
  unique_id = serializers.CharField(max_length=255)
14
  tipo_arquivo = serializers.CharField(max_length=255)
15
  link_arquivo = serializers.URLField()
16
 
17
 
18
- class GerarDocumentoSerializer(ResumoCursorSerializer):
19
  system_prompt = None
20
 
21
  files = serializers.ListField(child=FileInfoSerializer(), required=True)
@@ -55,7 +75,7 @@ class GerarDocumentoSerializer(ResumoCursorSerializer):
55
  version = serializers.CharField(required=True)
56
 
57
 
58
- class GerarDocumentoComPDFProprioSerializer(ResumoCursorSerializer):
59
  system_prompt = None
60
  # prompt_auxiliar = serializers.CharField(
61
  # required=False, default=prompt_auxiliar_padrao
 
1
  from rest_framework import serializers
 
2
  from _utils.gerar_relatorio_modelo_usuario.prompts import (
3
  prompt_gerar_documento,
4
  prompt_auxiliar_padrao,
 
8
  user_message = "What are the main points of this document?"
9
 
10
 
11
+ prompt_template = """
12
+ Based on the following context, provide multiple key points from the document.
13
+ For each point, create a new paragraph.
14
+ Each paragraph should be a complete, self-contained insight.
15
+
16
+ Context: {context}
17
+
18
+ Key points:
19
+ """
20
+
21
+
22
+ class GerarDocumentoInitialSerializer(serializers.Serializer):
23
+ files = serializers.ListField(child=serializers.FileField(), required=True)
24
+ system_prompt = serializers.CharField(required=False, default=prompt_template)
25
+ user_message = serializers.CharField(required=False, default="")
26
+ model = serializers.CharField(required=False, default=default_model)
27
+ hf_embedding = serializers.CharField(required=False, default="all-MiniLM-L6-v2")
28
+ chunk_size = serializers.IntegerField(required=False, default=3500)
29
+ chunk_overlap = serializers.IntegerField(required=False, default=800)
30
+
31
+
32
  class FileInfoSerializer(serializers.Serializer):
33
  unique_id = serializers.CharField(max_length=255)
34
  tipo_arquivo = serializers.CharField(max_length=255)
35
  link_arquivo = serializers.URLField()
36
 
37
 
38
+ class GerarDocumentoSerializer(GerarDocumentoInitialSerializer):
39
  system_prompt = None
40
 
41
  files = serializers.ListField(child=FileInfoSerializer(), required=True)
 
75
  version = serializers.CharField(required=True)
76
 
77
 
78
+ class GerarDocumentoComPDFProprioSerializer(GerarDocumentoInitialSerializer):
79
  system_prompt = None
80
  # prompt_auxiliar = serializers.CharField(
81
  # required=False, default=prompt_auxiliar_padrao
setup/urls.py CHANGED
@@ -2,9 +2,6 @@ from django.contrib import admin
2
  from django.urls import path, include
3
  from rest_framework import routers
4
  from drf_spectacular.views import SpectacularSwaggerView, SpectacularAPIView
5
- from _antigos.resumos.views import (
6
- ResumoSimplesCursorView,
7
- ) # Não sei por quê, mas se remover esta importação, o endpoint de gerar_documentos para de funcionar
8
 
9
  router = routers.DefaultRouter()
10
  # router.register("endpoint-teste", EndpointTesteViewSet, basename="Basename do endpoint-teste")
 
2
  from django.urls import path, include
3
  from rest_framework import routers
4
  from drf_spectacular.views import SpectacularSwaggerView, SpectacularAPIView
 
 
 
5
 
6
  router = routers.DefaultRouter()
7
  # router.register("endpoint-teste", EndpointTesteViewSet, basename="Basename do endpoint-teste")