Spaces:

MoRa2001
/

Job-Matcher-Intelligent

Running

App Files Files Community

MoRa2001 commited on May 17, 2024

Commit

cca1a92

verified ·

1 Parent(s): 3420843

Update functions.py

Browse files

Files changed (1) hide show

functions.py +123 -122

functions.py CHANGED Viewed

@@ -1,123 +1,124 @@
-import os
-import tempfile
-import fitz
-from sklearn.metrics.pairwise import cosine_similarity
-import numpy as np
-import pandas as pd
-from sklearn.feature_extraction.text import TfidfVectorizer
-from dotenv import load_dotenv
-import google.generativeai as genai
-load_dotenv() ## Load all the environment variables
-genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
-def extract_text_from_pdf(pdf_content):
-    """
-    Extracts text content from a PDF file.
-    Parameters:
-    - pdf_content (bytes): Bytes-like object containing the content of the PDF file.
-    Returns:
-    - str: Extracted text content from the PDF file.
-    """
-    text = ''
-    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
-        temp_file.write(pdf_content)
-        temp_path = temp_file.name
-        pdf_document = fitz.open(temp_path)
-        for page_number in range(pdf_document.page_count):
-            page = pdf_document[page_number]
-            text += page.get_text()
-    pdf_document.close()  # Close the PDF document explicitly
-    os.remove(temp_path)  # Remove the temporary file after use
-    return str(text.replace("\xa0", ""))
-def generate_gemini_content(transcript_text):
-    """
-    Generates a summary based on the input text using Google's Gemini Pro model.
-    Parameters:
-    - transcript_text (str): Text to be summarized.
-    Returns:
-    - str: Generated summary.
-    """
-    prompt = """
-    Instructions:
-        Please provide a concise summary of your relevant experience, skills,
-        and qualifications in the field of programming and technology.
-        Highlight your practical experience, technological proficiencies, technical skills, soft skills,
-        proficiency in programming languages and frameworks, as well as any other skills relevant to programming fields.
-        Additionally, include your location of residence and any other relevant details related to the programming industry
-        to facilitate accurate matching with job descriptions.
-    Example summary:
-        "Experienced software engineer with proficiency in Python, JavaScript, and Java.
-        Skilled in developing web applications using React.js and Django frameworks.
-        Strong problem-solving and communication skills. Located in New York City,
-        seeking opportunities in full-stack development to leverage my skills and contribute to innovative projects."
-    CV is :
-    """
-    model = genai.GenerativeModel("gemini-pro")
-    response = model.generate_content(prompt + transcript_text)
-    return response.text
-def git_indices(data, cv_vect, df_vect):
-    """
-    Computes cosine similarity between the vector representation of the input data and the vector representations of job descriptions.
-    Parameters:
-    - data (str): Input data.
-    - cv_vect (numpy.ndarray): Vector representation of the input data.
-    - df_vect (scipy.sparse.csr_matrix): Vector representations of job descriptions.
-    Returns:
-    - numpy.ndarray: Indices of job descriptions sorted in descending order of similarity.
-    """
-    for i in range(0, len([data])):
-        distances = cosine_similarity(cv_vect[i], df_vect).flatten()
-        indices = np.argsort(distances)[::-1]
-    return indices
-def fit_data(csv_path: str):
-    """
-    Reads and preprocesses job description data from a CSV file and creates TF-IDF vectors.
-    Parameters:
-    - csv_path (str): Path to the CSV file containing job descriptions.
-    Returns:
-    - pandas.DataFrame: DataFrame containing job descriptions.
-    - sklearn.feature_extraction.text.TfidfVectorizer: TF-IDF vectorizer object.
-    - scipy.sparse.csr_matrix: TF-IDF vectors of job descriptions.
-    """
-    df = pd.read_csv(csv_path)
-    x = df["concatenated_column"]
-    y = df["label"]
-    vectorizer = TfidfVectorizer(stop_words='english')
-    vectorizer.fit(x)
-    df_vect = vectorizer.transform(x)
-    return df, vectorizer, df_vect
-df, vectorizer, df_vect = fit_data(os.path.join(os.getcwd(), "all.csv") )
-def git_most_similar_job(cv_summarize: str, number_of_jobs: int):
-    """
-    Finds the most similar job descriptions to the input CV summary.
-    Parameters:
-    - cv_summarize (str): Summary of the CV.
-    - number_of_jobs (int): Number of similar job descriptions to return.
-    Returns:
-    - pandas.DataFrame: DataFrame containing the most similar job descriptions.
-    """
-    cv_vect = vectorizer.transform([cv_summarize])
-    indices = git_indices(data=cv_summarize, cv_vect=cv_vect, df_vect=df_vect)
-    prediction_data = df.iloc[indices[:number_of_jobs]]
-    # Check if all threads have finished
-    print("ALL Done \n\n")
     return prediction_data

+import os
+import tempfile
+import fitz
+from sklearn.metrics.pairwise import cosine_similarity
+import numpy as np
+import pandas as pd
+from sklearn.feature_extraction.text import TfidfVectorizer
+from dotenv import load_dotenv
+import google.generativeai as genai
+load_dotenv() ## Load all the environment variables
+genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
+def extract_text_from_pdf(pdf_content):
+    """
+    Extracts text content from a PDF file.
+    Parameters:
+    - pdf_content (bytes): Bytes-like object containing the content of the PDF file.
+    Returns:
+    - str: Extracted text content from the PDF file.
+    """
+    text = ''
+    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+        temp_file.write(pdf_content)
+        temp_path = temp_file.name
+        pdf_document = fitz.open(temp_path)
+        for page_number in range(pdf_document.page_count):
+            page = pdf_document[page_number]
+            text += page.get_text()
+    pdf_document.close()  # Close the PDF document explicitly
+    os.remove(temp_path)  # Remove the temporary file after use
+    return str(text.replace("\xa0", ""))
+def generate_gemini_content(transcript_text):
+    """
+    Generates a summary based on the input text using Google's Gemini Pro model.
+    Parameters:
+    - transcript_text (str): Text to be summarized.
+    Returns:
+    - str: Generated summary.
+    """
+    prompt = """
+    Instructions:
+        Please provide a concise summary of your relevant experience, skills,
+        and qualifications in the field of programming and technology.
+        Highlight your practical experience, technological proficiencies, technical skills, soft skills,
+        proficiency in programming languages and frameworks, as well as any other skills relevant to programming fields.
+        Additionally, include your location of residence and any other relevant details related to the programming industry
+        to facilitate accurate matching with job descriptions.
+    Example summary:
+        "Experienced software engineer with proficiency in Python, JavaScript, and Java.
+        Skilled in developing web applications using React.js and Django frameworks.
+        Strong problem-solving and communication skills. Located in New York City,
+        seeking opportunities in full-stack development to leverage my skills and contribute to innovative projects."
+    CV is :
+    """
+    model = genai.GenerativeModel("gemini-pro")
+    response = model.generate_content(prompt + transcript_text)
+    return response.text
+def git_indices(data, cv_vect, df_vect):
+    """
+    Computes cosine similarity between the vector representation of the input data and the vector representations of job descriptions.
+    Parameters:
+    - data (str): Input data.
+    - cv_vect (numpy.ndarray): Vector representation of the input data.
+    - df_vect (scipy.sparse.csr_matrix): Vector representations of job descriptions.
+    Returns:
+    - numpy.ndarray: Indices of job descriptions sorted in descending order of similarity.
+    """
+    for i in range(0, len([data])):
+        distances = cosine_similarity(cv_vect[i], df_vect).flatten()
+        indices = np.argsort(distances)[::-1]
+    return indices
+def fit_data(csv_path: str):
+    """
+    Reads and preprocesses job description data from a CSV file and creates TF-IDF vectors.
+    Parameters:
+    - csv_path (str): Path to the CSV file containing job descriptions.
+    Returns:
+    - pandas.DataFrame: DataFrame containing job descriptions.
+    - sklearn.feature_extraction.text.TfidfVectorizer: TF-IDF vectorizer object.
+    - scipy.sparse.csr_matrix: TF-IDF vectors of job descriptions.
+    """
+    df = pd.read_csv(csv_path)
+    x = df["concatenated_column"]
+    y = df["label"]
+    df.drop("concatenated_column", axis=1, inplace=True)
+    vectorizer = TfidfVectorizer(stop_words='english')
+    vectorizer.fit(x)
+    df_vect = vectorizer.transform(x)
+    return df, vectorizer, df_vect
+df, vectorizer, df_vect = fit_data(os.path.join(os.getcwd(), "all.csv") )
+def git_most_similar_job(cv_summarize: str, number_of_jobs: int):
+    """
+    Finds the most similar job descriptions to the input CV summary.
+    Parameters:
+    - cv_summarize (str): Summary of the CV.
+    - number_of_jobs (int): Number of similar job descriptions to return.
+    Returns:
+    - pandas.DataFrame: DataFrame containing the most similar job descriptions.
+    """
+    cv_vect = vectorizer.transform([cv_summarize])
+    indices = git_indices(data=cv_summarize, cv_vect=cv_vect, df_vect=df_vect)
+    prediction_data = df.iloc[indices[:number_of_jobs]]
+    # Check if all threads have finished
+    print("ALL Done \n\n")
     return prediction_data