Spaces:

rexoscare
/

Resume_screener

Build error

App Files Files Community

rexoscare commited on Jan 9, 2022

Commit

4f50f56

1 Parent(s): 91ecd19

Create app.py

Browse files

Files changed (1) hide show

app.py +75 -0

app.py ADDED Viewed

	@@ -0,0 +1,75 @@

+from sentence_transformers import SentenceTransformer
+from sklearn.metrics.pairwise import cosine_similarity
+from pprint import pprint
+import pandas as pd
+import gradio as gr
+import pdfplumber
+import texthero as hero
+from texthero import preprocessing as ppe
+import re
+model = SentenceTransformer('sentence-transformers/paraphrase-xlm-r-multilingual-v1')
+def remove_special_characters(text):
+    pattern = r'[^a-zA-Z]'
+    text = re.sub(pattern, ' ', text)
+    return text
+#word file (Job Description)
+def opentxt(filepath):
+    file_1 = open(filepath, errors="ignore")
+    file_2 = file_1.read()
+    file_2 = file_2.replace('\n', ' ')
+    file_2 = re.sub('www.\S+|www.\S+', '', file_2)
+    df_1 = pd.DataFrame([file_2], columns = ['text'])
+    df_1['text'] = df_1['text'].apply(remove_special_characters)
+    custom_pipeline = [ppe.fillna, ppe.remove_urls, ppe.remove_whitespace]
+    df_1['cleaned_text'] = hero.clean(df_1['text'], custom_pipeline)
+    file_2 = df_1['cleaned_text'].astype(str)
+    return file_2
+#pdf file (Resume)
+def pdftotext(filepath):
+    with pdfplumber.open(filepath) as pdf:
+        first_page = pdf.pages[0]
+        list_1 = first_page.extract_text(x_tolerance=3, y_tolerance=3)
+        list_1 = list_1.replace('\n', ' ')
+        list_1 = re.sub('www.\S+|www.\S+', '', list_1)
+        df = pd.DataFrame([list_1], columns = ['text'])
+        df['text'] = df['text'].apply(remove_special_characters)
+        custom_pipeline = [ppe.fillna, ppe.remove_urls, ppe.remove_whitespace]
+        df['cleaned_text'] = hero.clean(df['text'], custom_pipeline)
+        list_1 = df['cleaned_text'].astype(str)
+    return list_1
+def sent_similarity(filepath_1, filepath_2):
+    txt_1 = pdftotext(filepath_1.name)
+    txt_2 = opentxt(filepath_2.name)
+    sentences = [''.join(txt_1), ''.join(txt_2)]
+    sentence_embeddings = model.encode(sentences)
+    similarity = cosine_similarity(sentence_embeddings[0].reshape(1, -1),sentence_embeddings[1].reshape(1, -1))[0][0]
+    return round(similarity*100, 2)
+input_1 = gr.inputs.File(file_count="single", type="file", label= 'Upload the Resume (.pdf)', optional=False)
+input_2 = gr.inputs.File(file_count="single", type="file", label= 'Upload the Job Description (.docx)', optional=False)
+iface = gr.Interface(
+    sent_similarity,
+    [input_1, input_2], "label")
+if __name__ == "__main__":
+    iface.launch()