Spaces:
Sleeping
Sleeping
File size: 2,039 Bytes
5581268 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import gradio as gr
import os
import tempfile
from ocr_utils import extract_pdf_text
from span_classifier import load_model, predict_spans, format_results, format_final_output
from text_generator import generate_text
sbert_model, classifier_head, tokenizer, device = load_model(
model_path='rfahlevih/sentence-transformer-all-mpnetv2-resume-span-classifier',
head_path='./classification_head/SBERT-finetuned-span-classifier-1_classification_head.pt'
)
def full_pipeline(pdf_file):
ocr_text = extract_pdf_text(pdf_file)
if not ocr_text:
return "Oops! We cannot do OCR because the PDF file has not been provided or there is an error.", None
spans = predict_spans(
full_text=ocr_text,
model=sbert_model,
classification_head=classifier_head,
tokenizer=tokenizer,
device=device
)
formatted = format_results(spans)
final_span_output = format_final_output(formatted)
generated = generate_text(final_span_output)
custom_filename = "result_summary.txt"
temp_dir = tempfile.gettempdir()
custom_path = os.path.join(temp_dir, custom_filename)
# Simpan ke file sementara
with open(custom_path, "w", encoding="utf-8") as f:
f.write(generated)
return generated, custom_path
# Gradio UI
gr.Interface(
fn=full_pipeline,
inputs=gr.File(label="Drop your CV here (.pdf)", file_types=[".pdf"]),
outputs=[
gr.Textbox(label="Summary Results"),
gr.File(label="Download Summary Results (.txt)")
],
title="Curriculum Vitae Summarization using SBERT and T5",
description="This Curriculum Vitae summarization system was developed as part of my final project research, which focuses on problems in applicant tracking systems (ATS). To solve these problems, this system utilizes SBERT to extract important information from CVs, and uses the T5 model to generate text summaries based on previously extracted points.",
flagging_mode="never"
).launch()
|