File size: 2,575 Bytes
4f50f56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63da712
4f50f56
638e28f
 
4f50f56
55e2018
4f50f56
55e2018
4f50f56
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import gradio as gr
import pdfplumber
import texthero as hero
from texthero import preprocessing as ppe
import re


model = SentenceTransformer('sentence-transformers/paraphrase-xlm-r-multilingual-v1')


def remove_special_characters(text):
    pattern = r'[^a-zA-Z]'
    text = re.sub(pattern, ' ', text)
    return text


#word file (Job Description)
def opentxt(filepath):
    file_1 = open(filepath, errors="ignore")
    file_2 = file_1.read()
    file_2 = file_2.replace('\n', ' ')
    file_2 = re.sub('www.\S+|www.\S+', '', file_2)
    df_1 = pd.DataFrame([file_2], columns = ['text'])
    df_1['text'] = df_1['text'].apply(remove_special_characters)
    custom_pipeline = [ppe.fillna, ppe.remove_urls, ppe.remove_whitespace]
    df_1['cleaned_text'] = hero.clean(df_1['text'], custom_pipeline)
    file_2 = df_1['cleaned_text'].astype(str)
    return file_2


#pdf file (Resume)
def pdftotext(filepath):
    with pdfplumber.open(filepath) as pdf:
        first_page = pdf.pages[0]
        list_1 = first_page.extract_text(x_tolerance=3, y_tolerance=3)
        list_1 = list_1.replace('\n', ' ')
        list_1 = re.sub('www.\S+|www.\S+', '', list_1)
        df = pd.DataFrame([list_1], columns = ['text'])
        df['text'] = df['text'].apply(remove_special_characters)
        custom_pipeline = [ppe.fillna, ppe.remove_urls, ppe.remove_whitespace]
        df['cleaned_text'] = hero.clean(df['text'], custom_pipeline)
        list_1 = df['cleaned_text'].astype(str)
    return list_1


def sent_similarity(filepath_1, filepath_2):
    txt_1 = pdftotext(filepath_1.name)
    txt_2 = opentxt(filepath_2.name)
    sentences = [''.join(txt_1), ''.join(txt_2)]
    sentence_embeddings = model.encode(sentences)
    similarity = cosine_similarity(sentence_embeddings[0].reshape(1, -1),sentence_embeddings[1].reshape(1, -1))[0][0]
    return round(similarity*100, 2)


input_1 = gr.inputs.File(file_count="single", type="file", label= 'Upload the Resume (.pdf)', optional=False)
input_2 = gr.inputs.File(file_count="single", type="file", label= 'Upload the Job Description (.txt)', optional=False)

title = "Resume Screener"
description = "Upload your resume(.pdf) and the job description(.txt) and let the sentence similarity model display the similarity percentage !!!"

iface = gr.Interface( 
    sent_similarity,
    [input_1, input_2], "label", title = title, description = description)

if __name__ == "__main__":
    iface.launch()