Spaces:

manuelcozar55
/

LexAIcon_Mistral7B

Paused

App Files Files Community

manuelcozar55 commited on Jun 21, 2024

Commit

a80aa7b

verified ·

1 Parent(s): 557e7b4

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -20

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ import os
 login(token=os.getenv('HUGGINGFACE_TOKEN'))
-# Configuración del modelo de resumen
 llm = HuggingFaceEndpoint(
     repo_id="mistralai/Mistral-7B-Instruct-v0.3",
     task="text-generation",
@@ -37,7 +37,7 @@ def read_pdf(file_path):
         text += pdf_reader.pages[page].extract_text()
     return text
-def summarize(file):
     # Leer el contenido del archivo subido
     file_path = file.name
     if file_path.endswith('.pdf'):
@@ -45,14 +45,21 @@ def summarize(file):
     else:
         with open(file_path, 'r', encoding='utf-8') as f:
             text = f.read()
-    template = '''
-    Please carefully read the following document:
 <document>
-{TEXT}
 </document>
-After reading through the document, identify the key points and main ideas covered in the text. Organize these key points into a concise bulleted list that summarizes the essential information from the document. The summary should have a maximum of 10 bullet points.
-Your goal is to be comprehensive in capturing the core content of the document, while also being concise in how you express each summary point. Omit minor details and focus on the central themes and important facts.
     '''
     prompt = PromptTemplate(
@@ -63,7 +70,7 @@ Your goal is to be comprehensive in capturing the core content of the document,
     formatted_prompt = prompt.format(TEXT=text)
     output_summary = llm_engine_hf.invoke(formatted_prompt)
-    return output_summary.content
 def classify_text(text):
     inputs = tokenizer(text, return_tensors="pt", max_length=4096, truncation=True, padding="max_length")
@@ -85,11 +92,11 @@ def translate(file, target_language):
             text = f.read()
     template = '''
-    Please translate the following document to {LANGUAGE}:
 <document>
 {TEXT}
 </document>
-Ensure that the translation is accurate and preserves the original meaning of the document.
     '''
     prompt = PromptTemplate(
@@ -100,11 +107,11 @@ Ensure that the translation is accurate and preserves the original meaning of th
     formatted_prompt = prompt.format(TEXT=text, LANGUAGE=target_language)
     translated_text = llm_engine_hf.invoke(formatted_prompt)
-    return translated_text.content
-def process_file(file, action, target_language=None):
     if action == "Resumen":
-        return summarize(file)
     elif action == "Clasificar":
         file_path = file.name
         if file_path.endswith('.pdf'):
@@ -118,6 +125,15 @@ def process_file(file, action, target_language=None):
     else:
         return "Acción no válida"
 def download_text(output_text, filename='output.txt'):
     if output_text:
         file_path = Path(filename)
@@ -133,27 +149,34 @@ def create_download_file(output_text, filename='output.txt'):
 # Crear la interfaz de Gradio
 with gr.Blocks() as demo:
-    gr.Markdown("## Document Processor")
     with gr.Row():
         with gr.Column():
             file = gr.File(label="Subir un archivo")
             action = gr.Radio(label="Seleccione una acción", choices=["Resumen", "Clasificar", "Traducir"])
             target_language = gr.Dropdown(label="Seleccionar idioma de traducción", choices=["en", "fr", "de"], visible=False)
         with gr.Column():
             output_text = gr.Textbox(label="Resultado", lines=20)
-    def update_language_dropdown(action):
         if action == "Traducir":
-            return gr.update(visible=True)
         else:
-            return gr.update(visible=False)
-    action.change(update_language_dropdown, inputs=action, outputs=target_language)
     submit_button = gr.Button("Procesar")
-    submit_button.click(process_file, inputs=[file, action, target_language], outputs=output_text)
     def generate_file():
         summary_text = output_text.value
@@ -168,5 +191,8 @@ with gr.Blocks() as demo:
         outputs=gr.File()
     )
 # Ejecutar la aplicación Gradio
 demo.launch(share=True)

 login(token=os.getenv('HUGGINGFACE_TOKEN'))
+# Configuración del modelo LLM
 llm = HuggingFaceEndpoint(
     repo_id="mistralai/Mistral-7B-Instruct-v0.3",
     task="text-generation",
         text += pdf_reader.pages[page].extract_text()
     return text
+def summarize(file, summary_length):
     # Leer el contenido del archivo subido
     file_path = file.name
     if file_path.endswith('.pdf'):
     else:
         with open(file_path, 'r', encoding='utf-8') as f:
             text = f.read()
+    if summary_length == 'Corto':
+        length_instruction = "El resumen debe tener un máximo de 5 puntos."
+    elif summary_length == 'Medio':
+        length_instruction = "El resumen debe tener un máximo de 10 puntos."
+    else:
+        length_instruction = "El resumen debe tener un máximo de 15 puntos."
+    template = f'''
+    Por favor, lea detenidamente el siguiente documento:
 <document>
+{{TEXT}}
 </document>
+Después de leer el documento, identifique los puntos clave y las ideas principales cubiertas en el texto. Organice estos puntos clave en una lista con viñetas concisa que resuma la información esencial del documento. {length_instruction}
+Su objetivo es ser exhaustivo en la captura del contenido central del documento, mientras que también es conciso en la expresión de cada punto del resumen. Omita los detalles menores y concéntrese en los temas centrales y hechos importantes.
     '''
     prompt = PromptTemplate(
     formatted_prompt = prompt.format(TEXT=text)
     output_summary = llm_engine_hf.invoke(formatted_prompt)
+    return f"Prompt:\n{formatted_prompt}\n\nResumen:\n{output_summary.content}"
 def classify_text(text):
     inputs = tokenizer(text, return_tensors="pt", max_length=4096, truncation=True, padding="max_length")
             text = f.read()
     template = '''
+    Por favor, traduzca el siguiente documento al {LANGUAGE}:
 <document>
 {TEXT}
 </document>
+Asegúrese de que la traducción sea precisa y conserve el significado original del documento.
     '''
     prompt = PromptTemplate(
     formatted_prompt = prompt.format(TEXT=text, LANGUAGE=target_language)
     translated_text = llm_engine_hf.invoke(formatted_prompt)
+    return f"Prompt:\n{formatted_prompt}\n\nTraducción:\n{translated_text.content}"
+def process_file(file, action, target_language=None, summary_length=None):
     if action == "Resumen":
+        return summarize(file, summary_length)
     elif action == "Clasificar":
         file_path = file.name
         if file_path.endswith('.pdf'):
     else:
         return "Acción no válida"
+def answer_question(text, question):
+    messages = [
+        {"role": "system", "content": "Eres un asistente útil."},
+        {"role": "user", "content": f"El documento es el siguiente:\n{text}"},
+        {"role": "user", "content": question}
+    ]
+    response = llm_engine_hf.invoke(messages)
+    return response.content
 def download_text(output_text, filename='output.txt'):
     if output_text:
         file_path = Path(filename)
 # Crear la interfaz de Gradio
 with gr.Blocks() as demo:
+    gr.Markdown("## Procesador de Documentos")
     with gr.Row():
         with gr.Column():
             file = gr.File(label="Subir un archivo")
             action = gr.Radio(label="Seleccione una acción", choices=["Resumen", "Clasificar", "Traducir"])
             target_language = gr.Dropdown(label="Seleccionar idioma de traducción", choices=["en", "fr", "de"], visible=False)
+            summary_length = gr.Radio(label="Seleccione la longitud del resumen", choices=["Corto", "Medio", "Largo"], visible=False)
         with gr.Column():
             output_text = gr.Textbox(label="Resultado", lines=20)
+            question = gr.Textbox(label="Hacer una pregunta al documento", lines=2, visible=False)
+            answer = gr.Textbox(label="Respuesta", lines=2, interactive=False, visible=False)
+    def update_visible_elements(action):
         if action == "Traducir":
+            return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
+        elif action == "Resumen":
+            return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
+        elif action == "Clasificar":
+            return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(visible=False)
         else:
+            return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible(False)), gr.update(visible=False)
+    action.change(update_visible_elements, inputs=action, outputs=[target_language, summary_length, question, output_text, answer])
     submit_button = gr.Button("Procesar")
+    submit_button.click(process_file, inputs=[file, action, target_language, summary_length], outputs=output_text)
     def generate_file():
         summary_text = output_text.value
         outputs=gr.File()
     )
+    question_button = gr.Button("Hacer Pregunta")
+    question_button.click(answer_question, inputs=[output_text, question], outputs=answer)
 # Ejecutar la aplicación Gradio
 demo.launch(share=True)