import gradio as gr import os import tempfile from ocr_utils import extract_pdf_text from span_classifier import load_model, predict_spans, format_results, format_final_output from text_generator import generate_text sbert_model, classifier_head, tokenizer, device = load_model( model_path='rfahlevih/sentence-transformer-all-mpnetv2-resume-span-classifier', head_path='./classification_head/SBERT-finetuned-span-classifier-1_classification_head.pt' ) def full_pipeline(pdf_file): ocr_text = extract_pdf_text(pdf_file) if not ocr_text: return "Oops! We cannot do OCR because the PDF file has not been provided or there is an error.", None spans = predict_spans( full_text=ocr_text, model=sbert_model, classification_head=classifier_head, tokenizer=tokenizer, device=device ) formatted = format_results(spans) final_span_output = format_final_output(formatted) generated = generate_text(final_span_output) custom_filename = "result_summary.txt" temp_dir = tempfile.gettempdir() custom_path = os.path.join(temp_dir, custom_filename) # Simpan ke file sementara with open(custom_path, "w", encoding="utf-8") as f: f.write(generated) return generated, custom_path # Gradio UI gr.Interface( fn=full_pipeline, inputs=gr.File(label="Drop your CV here (.pdf)", file_types=[".pdf"]), outputs=[ gr.Textbox(label="Summary Results"), gr.File(label="Download Summary Results (.txt)") ], title="Curriculum Vitae Summarization using SBERT and T5", description="This Curriculum Vitae summarization system was developed as part of my final project research, which focuses on problems in applicant tracking systems (ATS). To solve these problems, this system utilizes SBERT to extract important information from CVs, and uses the T5 model to generate text summaries based on previously extracted points.", flagging_mode="never" ).launch()