Spaces:

manuelcozar55
/

LexAIcon_Mistral7B

Paused

App Files Files Community

manuelcozar55 commited on Jun 22, 2024

Commit

4387fb1

verified ·

1 Parent(s): 97ab717

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -12

app.py CHANGED Viewed

@@ -4,23 +4,23 @@ warnings.simplefilter(action='ignore', category=FutureWarning)
 import PyPDF2
 import gradio as gr
 from langchain.prompts import PromptTemplate
-from langchain.chains.summarize import load_summarize_chain
-from huggingface_hub import login
 from pathlib import Path
 from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 import os
 login(token=os.getenv('HUGGINGFACE_TOKEN'))
-# Configuración del modelo LLM
 llm = HuggingFaceEndpoint(
     repo_id="mistralai/Mistral-7B-Instruct-v0.3",
     task="text-generation",
     max_new_tokens=4096,
     temperature=0.5,
     do_sample=False,
 )
 llm_engine_hf = ChatHuggingFace(llm=llm)
@@ -47,12 +47,12 @@ def summarize(file):
             text = f.read()
     template = '''
-    Por favor, lea detenidamente el siguiente documento:
 <document>
 {TEXT}
 </document>
-Después de leer el documento, identifique los puntos clave y las ideas principales cubiertas en el texto. Organice estos puntos clave en una lista con viñetas concisa que resuma la información esencial del documento. El resumen debe tener un máximo de 10 puntos.
-Su objetivo es ser exhaustivo en la captura del contenido central del documento, mientras que también es conciso en la expresión de cada punto del resumen. Omita los detalles menores y concéntrese en los temas centrales y hechos importantes.
     '''
     prompt = PromptTemplate(
@@ -63,7 +63,7 @@ Su objetivo es ser exhaustivo en la captura del contenido central del documento,
     formatted_prompt = prompt.format(TEXT=text)
     output_summary = llm_engine_hf.invoke(formatted_prompt)
-    return f"Prompt:\n{formatted_prompt}\n\nResumen:\n{output_summary.content}"
 def classify_text(text):
     inputs = tokenizer(text, return_tensors="pt", max_length=4096, truncation=True, padding="max_length")
@@ -85,11 +85,11 @@ def translate(file, target_language):
             text = f.read()
     template = '''
-    Por favor, traduzca el siguiente documento al {LANGUAGE}:
 <document>
 {TEXT}
 </document>
-Asegúrese de que la traducción sea precisa y conserve el significado original del documento.
     '''
     prompt = PromptTemplate(
@@ -100,7 +100,7 @@ Asegúrese de que la traducción sea precisa y conserve el significado original
     formatted_prompt = prompt.format(TEXT=text, LANGUAGE=target_language)
     translated_text = llm_engine_hf.invoke(formatted_prompt)
-    return f"Prompt:\n{formatted_prompt}\n\nTraducción:\n{translated_text.content}"
 def process_file(file, action, target_language=None):
     if action == "Resumen":
@@ -133,7 +133,7 @@ def create_download_file(output_text, filename='output.txt'):
 # Crear la interfaz de Gradio
 with gr.Blocks() as demo:
-    gr.Markdown("## Procesador de Documentos")
     with gr.Row():
         with gr.Column():
@@ -169,4 +169,4 @@ with gr.Blocks() as demo:
     )
 # Ejecutar la aplicación Gradio
-demo.launch(share=True)

 import PyPDF2
 import gradio as gr
 from langchain.prompts import PromptTemplate
 from pathlib import Path
 from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
+from huggingface_hub import login
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 import os
 login(token=os.getenv('HUGGINGFACE_TOKEN'))
+# Configuración del modelo de resumen
 llm = HuggingFaceEndpoint(
     repo_id="mistralai/Mistral-7B-Instruct-v0.3",
     task="text-generation",
     max_new_tokens=4096,
     temperature=0.5,
     do_sample=False,
+    model_kwargs={"use_auth_token": HUGGINGFACE_TOKEN}  # Pasar el token como parte de los argumentos del modelo
 )
 llm_engine_hf = ChatHuggingFace(llm=llm)
             text = f.read()
     template = '''
+    Please carefully read the following document:
 <document>
 {TEXT}
 </document>
+After reading through the document, identify the key points and main ideas covered in the text. Organize these key points into a concise bulleted list that summarizes the essential information from the document. The summary should have a maximum of 10 bullet points.
+Your goal is to be comprehensive in capturing the core content of the document, while also being concise in how you express each summary point. Omit minor details and focus on the central themes and important facts.
     '''
     prompt = PromptTemplate(
     formatted_prompt = prompt.format(TEXT=text)
     output_summary = llm_engine_hf.invoke(formatted_prompt)
+    return output_summary.content
 def classify_text(text):
     inputs = tokenizer(text, return_tensors="pt", max_length=4096, truncation=True, padding="max_length")
             text = f.read()
     template = '''
+    Please translate the following document to {LANGUAGE}:
 <document>
 {TEXT}
 </document>
+Ensure that the translation is accurate and preserves the original meaning of the document.
     '''
     prompt = PromptTemplate(
     formatted_prompt = prompt.format(TEXT=text, LANGUAGE=target_language)
     translated_text = llm_engine_hf.invoke(formatted_prompt)
+    return translated_text.content
 def process_file(file, action, target_language=None):
     if action == "Resumen":
 # Crear la interfaz de Gradio
 with gr.Blocks() as demo:
+    gr.Markdown("## Document Processor")
     with gr.Row():
         with gr.Column():
     )
 # Ejecutar la aplicación Gradio
+demo.launch(share=True)