Update app.py
Browse files
app.py
CHANGED
@@ -4,23 +4,23 @@ warnings.simplefilter(action='ignore', category=FutureWarning)
|
|
4 |
import PyPDF2
|
5 |
import gradio as gr
|
6 |
from langchain.prompts import PromptTemplate
|
7 |
-
from langchain.chains.summarize import load_summarize_chain
|
8 |
-
from huggingface_hub import login
|
9 |
from pathlib import Path
|
10 |
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
|
|
|
11 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
12 |
import torch
|
13 |
import os
|
14 |
|
15 |
login(token=os.getenv('HUGGINGFACE_TOKEN'))
|
16 |
|
17 |
-
# Configuraci贸n del modelo
|
18 |
llm = HuggingFaceEndpoint(
|
19 |
repo_id="mistralai/Mistral-7B-Instruct-v0.3",
|
20 |
task="text-generation",
|
21 |
max_new_tokens=4096,
|
22 |
temperature=0.5,
|
23 |
do_sample=False,
|
|
|
24 |
)
|
25 |
llm_engine_hf = ChatHuggingFace(llm=llm)
|
26 |
|
@@ -47,12 +47,12 @@ def summarize(file):
|
|
47 |
text = f.read()
|
48 |
|
49 |
template = '''
|
50 |
-
|
51 |
<document>
|
52 |
{TEXT}
|
53 |
</document>
|
54 |
-
|
55 |
-
|
56 |
'''
|
57 |
|
58 |
prompt = PromptTemplate(
|
@@ -63,7 +63,7 @@ Su objetivo es ser exhaustivo en la captura del contenido central del documento,
|
|
63 |
formatted_prompt = prompt.format(TEXT=text)
|
64 |
output_summary = llm_engine_hf.invoke(formatted_prompt)
|
65 |
|
66 |
-
return
|
67 |
|
68 |
def classify_text(text):
|
69 |
inputs = tokenizer(text, return_tensors="pt", max_length=4096, truncation=True, padding="max_length")
|
@@ -85,11 +85,11 @@ def translate(file, target_language):
|
|
85 |
text = f.read()
|
86 |
|
87 |
template = '''
|
88 |
-
|
89 |
<document>
|
90 |
{TEXT}
|
91 |
</document>
|
92 |
-
|
93 |
'''
|
94 |
|
95 |
prompt = PromptTemplate(
|
@@ -100,7 +100,7 @@ Aseg煤rese de que la traducci贸n sea precisa y conserve el significado original
|
|
100 |
formatted_prompt = prompt.format(TEXT=text, LANGUAGE=target_language)
|
101 |
translated_text = llm_engine_hf.invoke(formatted_prompt)
|
102 |
|
103 |
-
return
|
104 |
|
105 |
def process_file(file, action, target_language=None):
|
106 |
if action == "Resumen":
|
@@ -133,7 +133,7 @@ def create_download_file(output_text, filename='output.txt'):
|
|
133 |
|
134 |
# Crear la interfaz de Gradio
|
135 |
with gr.Blocks() as demo:
|
136 |
-
gr.Markdown("##
|
137 |
|
138 |
with gr.Row():
|
139 |
with gr.Column():
|
@@ -169,4 +169,4 @@ with gr.Blocks() as demo:
|
|
169 |
)
|
170 |
|
171 |
# Ejecutar la aplicaci贸n Gradio
|
172 |
-
demo.launch(share=True)
|
|
|
4 |
import PyPDF2
|
5 |
import gradio as gr
|
6 |
from langchain.prompts import PromptTemplate
|
|
|
|
|
7 |
from pathlib import Path
|
8 |
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
|
9 |
+
from huggingface_hub import login
|
10 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
11 |
import torch
|
12 |
import os
|
13 |
|
14 |
login(token=os.getenv('HUGGINGFACE_TOKEN'))
|
15 |
|
16 |
+
# Configuraci贸n del modelo de resumen
|
17 |
llm = HuggingFaceEndpoint(
|
18 |
repo_id="mistralai/Mistral-7B-Instruct-v0.3",
|
19 |
task="text-generation",
|
20 |
max_new_tokens=4096,
|
21 |
temperature=0.5,
|
22 |
do_sample=False,
|
23 |
+
model_kwargs={"use_auth_token": HUGGINGFACE_TOKEN} # Pasar el token como parte de los argumentos del modelo
|
24 |
)
|
25 |
llm_engine_hf = ChatHuggingFace(llm=llm)
|
26 |
|
|
|
47 |
text = f.read()
|
48 |
|
49 |
template = '''
|
50 |
+
Please carefully read the following document:
|
51 |
<document>
|
52 |
{TEXT}
|
53 |
</document>
|
54 |
+
After reading through the document, identify the key points and main ideas covered in the text. Organize these key points into a concise bulleted list that summarizes the essential information from the document. The summary should have a maximum of 10 bullet points.
|
55 |
+
Your goal is to be comprehensive in capturing the core content of the document, while also being concise in how you express each summary point. Omit minor details and focus on the central themes and important facts.
|
56 |
'''
|
57 |
|
58 |
prompt = PromptTemplate(
|
|
|
63 |
formatted_prompt = prompt.format(TEXT=text)
|
64 |
output_summary = llm_engine_hf.invoke(formatted_prompt)
|
65 |
|
66 |
+
return output_summary.content
|
67 |
|
68 |
def classify_text(text):
|
69 |
inputs = tokenizer(text, return_tensors="pt", max_length=4096, truncation=True, padding="max_length")
|
|
|
85 |
text = f.read()
|
86 |
|
87 |
template = '''
|
88 |
+
Please translate the following document to {LANGUAGE}:
|
89 |
<document>
|
90 |
{TEXT}
|
91 |
</document>
|
92 |
+
Ensure that the translation is accurate and preserves the original meaning of the document.
|
93 |
'''
|
94 |
|
95 |
prompt = PromptTemplate(
|
|
|
100 |
formatted_prompt = prompt.format(TEXT=text, LANGUAGE=target_language)
|
101 |
translated_text = llm_engine_hf.invoke(formatted_prompt)
|
102 |
|
103 |
+
return translated_text.content
|
104 |
|
105 |
def process_file(file, action, target_language=None):
|
106 |
if action == "Resumen":
|
|
|
133 |
|
134 |
# Crear la interfaz de Gradio
|
135 |
with gr.Blocks() as demo:
|
136 |
+
gr.Markdown("## Document Processor")
|
137 |
|
138 |
with gr.Row():
|
139 |
with gr.Column():
|
|
|
169 |
)
|
170 |
|
171 |
# Ejecutar la aplicaci贸n Gradio
|
172 |
+
demo.launch(share=True)
|