Spaces:

faizan23423
/

cvscreening

Sleeping

App Files Files Community

faizanmumtaz commited on Jul 12, 2024

Commit

fa525b5

1 Parent(s): 4f853b0

first commmit

Browse files

Files changed (4) hide show

app.py +79 -0
dockerfile +23 -0
main.py +93 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,79 @@

+from fastapi import FastAPI, File, UploadFile, HTTPException, Form,BackgroundTasks
+from fastapi.responses import FileResponse
+import shutil,os,uuid,asyncio
+from zipfile import ZipFile
+from typing import List
+app = FastAPI()
+import logging
+# Configure logging
+logging.basicConfig(
+    filename='app.log',  # Log file path
+    level=logging.ERROR,  # Minimum severity level to log (e.g., ERROR, WARNING, INFO)
+    format='%(asctime)s %(levelname)s: %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S')
+# Get the secret key from environment variable
+SECRET_KEY = "123"
+def cleanup_directory(path: str):
+    shutil.rmtree(path)
+@app.post("/uploadfiles/")
+def upload_files(
+    background_tasks: BackgroundTasks,
+    files: List[UploadFile] = File(...),
+    api_key: str = Form(...),
+    percentage: int = Form(...),
+    job_description : str = Form(...)):
+    if api_key != SECRET_KEY:
+        raise HTTPException(status_code=401, detail="Unauthorized")
+    if not (5 <= len(files) <= 900):
+        raise HTTPException(status_code=400, detail="Number of files must be between 20 and 900.")
+    if percentage < 0 or percentage > 100:
+        raise HTTPException(status_code=400, detail="Percentage must be between 0 and 100.")
+    # Validate all files are PDFs
+    for file in files:
+        if not file.filename.endswith(".pdf"):
+            raise HTTPException(status_code=400, detail="All files must be PDF.")
+    pdfs_paths = []
+    main_path = f"data/resumes/{str(uuid.uuid4())}"
+    # Create the directory if it doesn't exist
+    os.makedirs(main_path, exist_ok=True)
+    for file in files:
+        file_location = f"{main_path}/{file.filename}"
+        with open(file_location, "wb") as buffer:
+            shutil.copyfileobj(file.file, buffer)
+        pdfs_paths.append(file_location)
+    try:
+        pdfs_path_with_description = [{"pdf_path":pdf_path,"job_des":job_description} for pdf_path in pdfs_paths]
+        from app import compression
+        # short_listed_files_paths = asyncio.run(compression(pdfs_path_with_description,percentage))
+        short_listed_files_paths = compression(pdfs_path_with_description,percentage)
+        zip_filename = f"{main_path}/{str(uuid.uuid4())}.zip"
+        with ZipFile(zip_filename, 'w') as zipf:
+            for file_path in short_listed_files_paths:
+                zipf.write(file_path, os.path.basename(file_path))
+        # Return the file response and clean up after sending
+        background_tasks.add_task(cleanup_directory, main_path)
+        return FileResponse(zip_filename, media_type='application/zip', filename=zip_filename)
+    except Exception as e:
+        logging.error(f"Error occurred: {str(e)}")
+        cleanup_directory(main_path)
+        raise HTTPException(status_code=500, detail=str(e))

dockerfile ADDED Viewed

	@@ -0,0 +1,23 @@

+# Use the official Python image from the Docker Hub
+FROM python:3.12-slim
+# Set the working directory in the container
+WORKDIR /app
+# Copy the requirements file into the container at /app
+COPY requirements.txt .
+# Install the dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of the application code into the container at /app
+COPY . .
+# Expose the port that the app runs on
+EXPOSE 7860
+# Command to run the Uvicorn server
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
+# docker run -d --rm --name "pdfsummary" -e dgoogle_api_key=AIzaSyARfxSKQwobd0MNuOAt6yUjmNUFGX4k_eI -e google_api_key=AIzaSyARfxSKQwobd0MNuOAt6yUjmNUFGX4k_eI -p 8000:8000 cvscreening:latest
+# docker run -d --rm -p

main.py ADDED Viewed

	@@ -0,0 +1,93 @@

+from langchain_core.runnables import RunnableParallel,RunnablePassthrough,RunnableLambda
+from langchain_core.output_parsers.openai_tools import PydanticToolsParser
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_core.runnables import RunnablePassthrough
+from langchain_core.pydantic_v1 import BaseModel, Field
+from langchain.document_loaders import PyPDFLoader
+from langchain_core.prompts import PromptTemplate
+from langchain_core.documents import Document
+from langchain.prompts import PromptTemplate
+from langchain_groq import ChatGroq
+import os,warnings
+warnings.filterwarnings("ignore")
+class cv_score(BaseModel):
+    """You are a powerful HR assistant. Your task is to review the given CV and determine if it matches the job requirements specified in the job description, and give a score between 0 and 1 based on their relevancy. Please do your best; it is very important for my career. If both or any of the fields are empty, then also return 0. Also, return the matching score between 0 and 1.
+    > Relevant Score Between (0 and 1):
+    """
+    score: str = Field(..., description="Give a score to the CV between 0 and 1")
+google = ChatGoogleGenerativeAI(temperature=0,model="gemini-1.5-flash",google_api_key="l1Jm8jpIpq6IvfqES5LEDUQp9CkSINmjiwYoLNtD")
+google2 = ChatGoogleGenerativeAI(temperature=0,model="gemini-1.5-flash",google_api_key="AIzaSyARfxSKQwobd0MNuOAt6yUjmNUFGX4k_eI").with_fallbacks([google])
+# ,tool_choice="predict_bool"
+llm_with_tools = google2.bind_tools([cv_score])
+def get_data(file_path):
+    pages = PyPDFLoader(file_path=file_path).load()
+    if len(pages) > 1:
+        pdfstring = ""
+        metadata = {}
+        for page in pages:
+            pdfstring += page.page_content
+            metadata.update(page.metadata)
+        return [Document(
+            page_content=pdfstring,
+            metadata=metadata)][0]
+    else:
+        return pages[0]
+def process_score(cv_score):
+    try:
+        return float(cv_score[0].score)
+    except Exception as e:
+        return 0.50
+template = PromptTemplate.from_template("""
+<job description>
+{job_des}
+</job description>
+------------
+<cv>
+{cv}
+</cv>
+""")
+cv_score_cal = template | llm_with_tools | PydanticToolsParser(tools=[cv_score]) | process_score
+def process_data(dict_input: dict) -> dict:
+    pdf_data:Document = dict_input["pdf_data"]
+    return {
+        "source": pdf_data.metadata["source"],
+        "cv": pdf_data.page_content,
+        "job_des": dict_input["job_des"]
+    }
+pdfs_dir = "data"
+job_description = "HI looking for python developer"
+content_chain = RunnableParallel(
+    {
+        "pdf_data": lambda x: get_data(x["pdf_path"]),
+        "job_des": lambda x: x["job_des"]
+    }
+) | RunnableLambda(process_data) | RunnablePassthrough.assign(cv_score = (lambda x:x) | cv_score_cal)
+def shortlist_cvs(scored_cvs:list[dict],percentage:int) -> list[str]:
+    scored_cvs.sort(key=lambda x:x.get("cv_score",0),reverse=True)
+    # Calculate the number of CVs to shortlist based on the percentage
+    shortlist_count = int(len(scored_cvs) * percentage / 100)
+    # Select the top N percent CVs
+    shortlisted_cvs = scored_cvs[:shortlist_count]
+    return [cv.get("source") for cv in shortlisted_cvs]
+def compression(pdfs_path:list[dict],percentage:int):
+    scored_cvs_list:list[dict] = content_chain.batch(pdfs_path)
+    shortlisted_cvs:list[str] = shortlist_cvs(scored_cvs_list,percentage)
+    return shortlisted_cvs

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+fastapi==0.111.0
+langchain==0.2.7
+langchain_community==0.2.7
+langchain_core==0.2.12
+langchain_google_genai==1.0.7
+langchain_groq==0.1.6
+uvicorn==0.30.1
+pypdf