|
import gradio as gr |
|
import spacy |
|
import pandas as pd |
|
from docx import Document |
|
from io import BytesIO |
|
|
|
|
|
nlp = spacy.load('zh_core_web_trf') |
|
|
|
|
|
def extract_names_from_docx(docx_file): |
|
|
|
document = Document(docx_file) |
|
full_text = [] |
|
for para in document.paragraphs: |
|
full_text.append(para.text) |
|
|
|
|
|
text = ' '.join(full_text) |
|
|
|
|
|
doc = nlp(text) |
|
|
|
|
|
persons = [ent.text for ent in doc.ents if ent.label_ == 'PER'] |
|
|
|
|
|
persons = list(set(persons)) |
|
|
|
|
|
df = pd.DataFrame(persons, columns=['Nombres']) |
|
|
|
|
|
output = BytesIO() |
|
with pd.ExcelWriter(output, engine='openpyxl') as writer: |
|
df.to_excel(writer, index=False) |
|
output.seek(0) |
|
|
|
return output, "nombres_personas.xlsx" |
|
|
|
|
|
iface = gr.Interface( |
|
fn=extract_names_from_docx, |
|
inputs=gr.File(file_types=[".docx"]), |
|
outputs=[gr.File(), "text"], |
|
title="Extractor de Nombres", |
|
description="Sube un archivo .docx y extrae los nombres de las personas usando NLP con SpaCy. Descarga el resultado en un archivo Excel." |
|
) |
|
|
|
|
|
iface.launch() |