manuelcozar55 commited on
Commit
a80aa7b
verified
1 Parent(s): 557e7b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -20
app.py CHANGED
@@ -14,7 +14,7 @@ import os
14
 
15
  login(token=os.getenv('HUGGINGFACE_TOKEN'))
16
 
17
- # Configuraci贸n del modelo de resumen
18
  llm = HuggingFaceEndpoint(
19
  repo_id="mistralai/Mistral-7B-Instruct-v0.3",
20
  task="text-generation",
@@ -37,7 +37,7 @@ def read_pdf(file_path):
37
  text += pdf_reader.pages[page].extract_text()
38
  return text
39
 
40
- def summarize(file):
41
  # Leer el contenido del archivo subido
42
  file_path = file.name
43
  if file_path.endswith('.pdf'):
@@ -45,14 +45,21 @@ def summarize(file):
45
  else:
46
  with open(file_path, 'r', encoding='utf-8') as f:
47
  text = f.read()
48
-
49
- template = '''
50
- Please carefully read the following document:
 
 
 
 
 
 
 
51
  <document>
52
- {TEXT}
53
  </document>
54
- After reading through the document, identify the key points and main ideas covered in the text. Organize these key points into a concise bulleted list that summarizes the essential information from the document. The summary should have a maximum of 10 bullet points.
55
- Your goal is to be comprehensive in capturing the core content of the document, while also being concise in how you express each summary point. Omit minor details and focus on the central themes and important facts.
56
  '''
57
 
58
  prompt = PromptTemplate(
@@ -63,7 +70,7 @@ Your goal is to be comprehensive in capturing the core content of the document,
63
  formatted_prompt = prompt.format(TEXT=text)
64
  output_summary = llm_engine_hf.invoke(formatted_prompt)
65
 
66
- return output_summary.content
67
 
68
  def classify_text(text):
69
  inputs = tokenizer(text, return_tensors="pt", max_length=4096, truncation=True, padding="max_length")
@@ -85,11 +92,11 @@ def translate(file, target_language):
85
  text = f.read()
86
 
87
  template = '''
88
- Please translate the following document to {LANGUAGE}:
89
  <document>
90
  {TEXT}
91
  </document>
92
- Ensure that the translation is accurate and preserves the original meaning of the document.
93
  '''
94
 
95
  prompt = PromptTemplate(
@@ -100,11 +107,11 @@ Ensure that the translation is accurate and preserves the original meaning of th
100
  formatted_prompt = prompt.format(TEXT=text, LANGUAGE=target_language)
101
  translated_text = llm_engine_hf.invoke(formatted_prompt)
102
 
103
- return translated_text.content
104
 
105
- def process_file(file, action, target_language=None):
106
  if action == "Resumen":
107
- return summarize(file)
108
  elif action == "Clasificar":
109
  file_path = file.name
110
  if file_path.endswith('.pdf'):
@@ -118,6 +125,15 @@ def process_file(file, action, target_language=None):
118
  else:
119
  return "Acci贸n no v谩lida"
120
 
 
 
 
 
 
 
 
 
 
121
  def download_text(output_text, filename='output.txt'):
122
  if output_text:
123
  file_path = Path(filename)
@@ -133,27 +149,34 @@ def create_download_file(output_text, filename='output.txt'):
133
 
134
  # Crear la interfaz de Gradio
135
  with gr.Blocks() as demo:
136
- gr.Markdown("## Document Processor")
137
 
138
  with gr.Row():
139
  with gr.Column():
140
  file = gr.File(label="Subir un archivo")
141
  action = gr.Radio(label="Seleccione una acci贸n", choices=["Resumen", "Clasificar", "Traducir"])
142
  target_language = gr.Dropdown(label="Seleccionar idioma de traducci贸n", choices=["en", "fr", "de"], visible=False)
 
143
 
144
  with gr.Column():
145
  output_text = gr.Textbox(label="Resultado", lines=20)
 
 
146
 
147
- def update_language_dropdown(action):
148
  if action == "Traducir":
149
- return gr.update(visible=True)
 
 
 
 
150
  else:
151
- return gr.update(visible=False)
152
 
153
- action.change(update_language_dropdown, inputs=action, outputs=target_language)
154
 
155
  submit_button = gr.Button("Procesar")
156
- submit_button.click(process_file, inputs=[file, action, target_language], outputs=output_text)
157
 
158
  def generate_file():
159
  summary_text = output_text.value
@@ -168,5 +191,8 @@ with gr.Blocks() as demo:
168
  outputs=gr.File()
169
  )
170
 
 
 
 
171
  # Ejecutar la aplicaci贸n Gradio
172
  demo.launch(share=True)
 
14
 
15
  login(token=os.getenv('HUGGINGFACE_TOKEN'))
16
 
17
+ # Configuraci贸n del modelo LLM
18
  llm = HuggingFaceEndpoint(
19
  repo_id="mistralai/Mistral-7B-Instruct-v0.3",
20
  task="text-generation",
 
37
  text += pdf_reader.pages[page].extract_text()
38
  return text
39
 
40
+ def summarize(file, summary_length):
41
  # Leer el contenido del archivo subido
42
  file_path = file.name
43
  if file_path.endswith('.pdf'):
 
45
  else:
46
  with open(file_path, 'r', encoding='utf-8') as f:
47
  text = f.read()
48
+
49
+ if summary_length == 'Corto':
50
+ length_instruction = "El resumen debe tener un m谩ximo de 5 puntos."
51
+ elif summary_length == 'Medio':
52
+ length_instruction = "El resumen debe tener un m谩ximo de 10 puntos."
53
+ else:
54
+ length_instruction = "El resumen debe tener un m谩ximo de 15 puntos."
55
+
56
+ template = f'''
57
+ Por favor, lea detenidamente el siguiente documento:
58
  <document>
59
+ {{TEXT}}
60
  </document>
61
+ Despu茅s de leer el documento, identifique los puntos clave y las ideas principales cubiertas en el texto. Organice estos puntos clave en una lista con vi帽etas concisa que resuma la informaci贸n esencial del documento. {length_instruction}
62
+ Su objetivo es ser exhaustivo en la captura del contenido central del documento, mientras que tambi茅n es conciso en la expresi贸n de cada punto del resumen. Omita los detalles menores y conc茅ntrese en los temas centrales y hechos importantes.
63
  '''
64
 
65
  prompt = PromptTemplate(
 
70
  formatted_prompt = prompt.format(TEXT=text)
71
  output_summary = llm_engine_hf.invoke(formatted_prompt)
72
 
73
+ return f"Prompt:\n{formatted_prompt}\n\nResumen:\n{output_summary.content}"
74
 
75
  def classify_text(text):
76
  inputs = tokenizer(text, return_tensors="pt", max_length=4096, truncation=True, padding="max_length")
 
92
  text = f.read()
93
 
94
  template = '''
95
+ Por favor, traduzca el siguiente documento al {LANGUAGE}:
96
  <document>
97
  {TEXT}
98
  </document>
99
+ Aseg煤rese de que la traducci贸n sea precisa y conserve el significado original del documento.
100
  '''
101
 
102
  prompt = PromptTemplate(
 
107
  formatted_prompt = prompt.format(TEXT=text, LANGUAGE=target_language)
108
  translated_text = llm_engine_hf.invoke(formatted_prompt)
109
 
110
+ return f"Prompt:\n{formatted_prompt}\n\nTraducci贸n:\n{translated_text.content}"
111
 
112
+ def process_file(file, action, target_language=None, summary_length=None):
113
  if action == "Resumen":
114
+ return summarize(file, summary_length)
115
  elif action == "Clasificar":
116
  file_path = file.name
117
  if file_path.endswith('.pdf'):
 
125
  else:
126
  return "Acci贸n no v谩lida"
127
 
128
+ def answer_question(text, question):
129
+ messages = [
130
+ {"role": "system", "content": "Eres un asistente 煤til."},
131
+ {"role": "user", "content": f"El documento es el siguiente:\n{text}"},
132
+ {"role": "user", "content": question}
133
+ ]
134
+ response = llm_engine_hf.invoke(messages)
135
+ return response.content
136
+
137
  def download_text(output_text, filename='output.txt'):
138
  if output_text:
139
  file_path = Path(filename)
 
149
 
150
  # Crear la interfaz de Gradio
151
  with gr.Blocks() as demo:
152
+ gr.Markdown("## Procesador de Documentos")
153
 
154
  with gr.Row():
155
  with gr.Column():
156
  file = gr.File(label="Subir un archivo")
157
  action = gr.Radio(label="Seleccione una acci贸n", choices=["Resumen", "Clasificar", "Traducir"])
158
  target_language = gr.Dropdown(label="Seleccionar idioma de traducci贸n", choices=["en", "fr", "de"], visible=False)
159
+ summary_length = gr.Radio(label="Seleccione la longitud del resumen", choices=["Corto", "Medio", "Largo"], visible=False)
160
 
161
  with gr.Column():
162
  output_text = gr.Textbox(label="Resultado", lines=20)
163
+ question = gr.Textbox(label="Hacer una pregunta al documento", lines=2, visible=False)
164
+ answer = gr.Textbox(label="Respuesta", lines=2, interactive=False, visible=False)
165
 
166
+ def update_visible_elements(action):
167
  if action == "Traducir":
168
+ return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
169
+ elif action == "Resumen":
170
+ return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
171
+ elif action == "Clasificar":
172
+ return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(visible=False)
173
  else:
174
+ return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible(False)), gr.update(visible=False)
175
 
176
+ action.change(update_visible_elements, inputs=action, outputs=[target_language, summary_length, question, output_text, answer])
177
 
178
  submit_button = gr.Button("Procesar")
179
+ submit_button.click(process_file, inputs=[file, action, target_language, summary_length], outputs=output_text)
180
 
181
  def generate_file():
182
  summary_text = output_text.value
 
191
  outputs=gr.File()
192
  )
193
 
194
+ question_button = gr.Button("Hacer Pregunta")
195
+ question_button.click(answer_question, inputs=[output_text, question], outputs=answer)
196
+
197
  # Ejecutar la aplicaci贸n Gradio
198
  demo.launch(share=True)