Spaces:

manuelcozar55
/

LexAIcon_Mistral7B

Paused

App Files Files Community

LexAIcon_Mistral7B / app.py

manuelcozar55

Update app.py

b169e9b verified about 1 year ago

raw

history blame

5.22 kB

	import warnings
	warnings.simplefilter(action='ignore', category=FutureWarning)

	import PyPDF2
	import gradio as gr
	from langchain.prompts import PromptTemplate
	from langchain.chains.summarize import load_summarize_chain
	from huggingface_hub import login
	from pathlib import Path
	from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import torch
	import os

	login(token=os.getenv('HUGGINGFACE_TOKEN'))

	# Configuración del modelo LLM
	llm = HuggingFaceEndpoint(
	repo_id="mistralai/Mistral-7B-Instruct-v0.3",
	task="text-generation",
	max_new_tokens=4096,
	temperature=0.5,
	do_sample=False,
	)
	llm_engine_hf = ChatHuggingFace(llm=llm)

	# Configuración del modelo de clasificación
	tokenizer = AutoTokenizer.from_pretrained("mrm8488/legal-longformer-base-8192-spanish")
	model = AutoModelForSequenceClassification.from_pretrained("mrm8488/legal-longformer-base-8192-spanish")

	id2label = {0: "multas", 1: "politicas_de_privacidad", 2: "contratos", 3: "denuncias", 4: "otros"}

	def read_pdf(file_path):
	pdf_reader = PyPDF2.PdfReader(file_path)
	text = ""
	for page in range(len(pdf_reader.pages)):
	text += pdf_reader.pages[page].extract_text()
	return text

	def summarize(file):
	# Leer el contenido del archivo subido
	file_path = file.name
	if file_path.endswith('.pdf'):
	text = read_pdf(file_path)
	else:
	with open(file_path, 'r', encoding='utf-8') as f:
	text = f.read()

	template = '''
	Por favor, lea detenidamente el siguiente documento:
	<document>
	{TEXT}
	</document>
	Después de leer el documento, identifique los puntos clave y las ideas principales cubiertas en el texto. Organice estos puntos clave en una lista con viñetas concisa que resuma la información esencial del documento. El resumen debe tener un máximo de 10 puntos.
	Su objetivo es ser exhaustivo en la captura del contenido central del documento, mientras que también es conciso en la expresión de cada punto del resumen. Omita los detalles menores y concéntrese en los temas centrales y hechos importantes.
	'''

	prompt = PromptTemplate(
	template=template,
	input_variables=['TEXT']
	)

	formatted_prompt = prompt.format(TEXT=text)
	output_summary = llm_engine_hf.invoke(formatted_prompt)

	return f"Prompt:\n{formatted_prompt}\n\nResumen:\n{output_summary.content}"

	def classify_text(text):
	inputs = tokenizer(text, return_tensors="pt", max_length=4096, truncation=True, padding="max_length")
	model.eval()
	with torch.no_grad():
	outputs = model(**inputs)
	logits = outputs.logits
	predicted_class_id = logits.argmax(dim=-1).item()
	predicted_label = id2label[predicted_class_id]
	return predicted_label

	def translate(file, target_language):
	# Leer el contenido del archivo subido
	file_path = file.name
	if file_path.endswith('.pdf'):
	text = read_pdf(file_path)
	else:
	with open(file_path, 'r', encoding='utf-8') as f:
	text = f.read()

	template = '''
	Por favor, traduzca el siguiente documento al {LANGUAGE}:
	<document>
	{TEXT}
	</document>
	Asegúrese de que la traducción sea precisa y conserve el significado original del documento.
	'''

	prompt = PromptTemplate(
	template=template,
	input_variables=['TEXT', 'LANGUAGE']
	)

	formatted_prompt = prompt.format(TEXT=text, LANGUAGE=target_language)
	translated_text = llm_engine_hf.invoke(formatted_prompt)

	return f"Prompt:\n{formatted_prompt}\n\nTraducción:\n{translated_text.content}"

	def process_file(file, action, target_language=None):
	if action == "Resumen":
	return summarize(file)
	elif action == "Clasificar":
	file_path = file.name
	if file_path.endswith('.pdf'):
	text = read_pdf(file_path)
	else:
	with open(file_path, 'r', encoding='utf-8') as f:
	text = f.read()
	return classify_text(text)
	elif action == "Traducir":
	return translate(file, target_language)
	else:
	return "Acción no válida"

	# Crear la interfaz de Gradio
	with gr.Blocks() as demo:
	gr.Markdown("## Procesador de Documentos")

	with gr.Row():
	with gr.Column():
	file = gr.File(label="Subir un archivo")
	action = gr.Radio(label="Seleccione una acción", choices=["Resumen", "Clasificar", "Traducir"])
	target_language = gr.Dropdown(label="Seleccionar idioma de traducción", choices=["en", "fr", "de"], visible=False)

	with gr.Column():
	output_text = gr.Textbox(label="Resultado", lines=20)

	def update_language_dropdown(action):
	if action == "Traducir":
	return gr.update(visible=True)
	else:
	return gr.update(visible=False)

	action.change(update_language_dropdown, inputs=action, outputs=target_language)

	submit_button = gr.Button("Procesar")
	submit_button.click(process_file, inputs=[file, action, target_language], outputs=output_text)

	# Ejecutar la aplicación Gradio
	demo.launch(share=True)