rexoscare commited on
Commit
4f50f56
·
1 Parent(s): 91ecd19

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -0
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ from sklearn.metrics.pairwise import cosine_similarity
3
+ from pprint import pprint
4
+ import pandas as pd
5
+ import gradio as gr
6
+ import pdfplumber
7
+ import texthero as hero
8
+ from texthero import preprocessing as ppe
9
+ import re
10
+
11
+
12
+ model = SentenceTransformer('sentence-transformers/paraphrase-xlm-r-multilingual-v1')
13
+
14
+
15
+ def remove_special_characters(text):
16
+ pattern = r'[^a-zA-Z]'
17
+ text = re.sub(pattern, ' ', text)
18
+ return text
19
+
20
+
21
+ #word file (Job Description)
22
+ def opentxt(filepath):
23
+ file_1 = open(filepath, errors="ignore")
24
+ file_2 = file_1.read()
25
+ file_2 = file_2.replace('\n', ' ')
26
+ file_2 = re.sub('www.\S+|www.\S+', '', file_2)
27
+ df_1 = pd.DataFrame([file_2], columns = ['text'])
28
+ df_1['text'] = df_1['text'].apply(remove_special_characters)
29
+ custom_pipeline = [ppe.fillna, ppe.remove_urls, ppe.remove_whitespace]
30
+ df_1['cleaned_text'] = hero.clean(df_1['text'], custom_pipeline)
31
+ file_2 = df_1['cleaned_text'].astype(str)
32
+ return file_2
33
+
34
+
35
+ #pdf file (Resume)
36
+ def pdftotext(filepath):
37
+ with pdfplumber.open(filepath) as pdf:
38
+ first_page = pdf.pages[0]
39
+ list_1 = first_page.extract_text(x_tolerance=3, y_tolerance=3)
40
+ list_1 = list_1.replace('\n', ' ')
41
+ list_1 = re.sub('www.\S+|www.\S+', '', list_1)
42
+ df = pd.DataFrame([list_1], columns = ['text'])
43
+ df['text'] = df['text'].apply(remove_special_characters)
44
+ custom_pipeline = [ppe.fillna, ppe.remove_urls, ppe.remove_whitespace]
45
+ df['cleaned_text'] = hero.clean(df['text'], custom_pipeline)
46
+ list_1 = df['cleaned_text'].astype(str)
47
+ return list_1
48
+
49
+
50
+ def sent_similarity(filepath_1, filepath_2):
51
+ txt_1 = pdftotext(filepath_1.name)
52
+ txt_2 = opentxt(filepath_2.name)
53
+ sentences = [''.join(txt_1), ''.join(txt_2)]
54
+ sentence_embeddings = model.encode(sentences)
55
+ similarity = cosine_similarity(sentence_embeddings[0].reshape(1, -1),sentence_embeddings[1].reshape(1, -1))[0][0]
56
+ return round(similarity*100, 2)
57
+
58
+
59
+ input_1 = gr.inputs.File(file_count="single", type="file", label= 'Upload the Resume (.pdf)', optional=False)
60
+ input_2 = gr.inputs.File(file_count="single", type="file", label= 'Upload the Job Description (.docx)', optional=False)
61
+
62
+
63
+
64
+ iface = gr.Interface(
65
+ sent_similarity,
66
+ [input_1, input_2], "label")
67
+
68
+ if __name__ == "__main__":
69
+ iface.launch()
70
+
71
+
72
+
73
+
74
+
75
+