Spaces:
Running
Running
from io import BytesIO | |
from dotenv import load_dotenv | |
import os | |
from utils import google_search,split_text_into_chunks,insert_embeddings_into_pinecone_database,query_vector_database,generate_embedding_for_user_resume,delete_vector_namespace,create_user,login_user | |
from fastapi import FastAPI, File, UploadFile | |
from fastapi.responses import JSONResponse | |
import docx | |
import fitz | |
from scraper import scrapeCourse | |
import asyncio | |
from google import genai | |
from pydantic import BaseModel | |
load_dotenv() | |
CX = os.getenv("SEARCH_ENGINE_ID") | |
API_KEY = os.getenv("GOOGLE_API_KEY") | |
PINECONE_API_KEY=os.getenv("PINECONE_API_KEY") | |
GEMINI_API_KEY=os.getenv("GEMINI_API_KEY") | |
MONGO_URI=os.getenv("MONGO_URI") | |
app = FastAPI() | |
import re | |
class UserBody(BaseModel): | |
Email:str | |
Password:str | |
class AiAnalysis(BaseModel): | |
UserId:str | |
Query:str | |
class UserCourse(BaseModel): | |
EmploymentStatus:str | |
InterimRole:str | |
DesiredRole:str | |
Motivation:str | |
LearningPreference:str | |
HoursSpentLearning:str | |
Challenges:str | |
TimeframeToAchieveDreamRole:str | |
userId:str | |
class CourseRecommendation(BaseModel): | |
CourseName: str | |
CompletionTime: str | |
def extract_course_info(text: str) -> CourseRecommendation: | |
# Example regex patterns – adjust these as needed based on the response format. | |
course_pattern =r'"coursename":\s*"([^"]+)"' | |
time_pattern = r"(\d+\s*-\s*\d+\s*months)" | |
course_match = re.search(course_pattern, text) | |
time_match = re.search(time_pattern, text) | |
coursename = course_match.group(1).strip() if course_match else "Unknown" | |
completiontime = time_match.group(0).strip() if time_match else "Unknown" | |
return CourseRecommendation(CourseName=coursename, CompletionTime=completiontime) | |
def get_course(query): | |
# Example search query | |
results = google_search(query, API_KEY, CX) | |
content=[] | |
if results: | |
for item in results.get('items', []): | |
title = item.get('title') | |
link = item.get('link') | |
snippet = item.get('snippet') | |
content_structure={} | |
content_structure["Course_Title"]=title | |
content_structure["Course_Link"]=link | |
content_structure["Course_Snippet"]= snippet | |
content_structure["Scraped_Course_Details"]= scrapeCourse(url=link) | |
content.append(content_structure) | |
return JSONResponse(content,status_code=200) | |
def get_course_func(query): | |
# Example search query | |
results = google_search(query, API_KEY, CX) | |
content=[] | |
if results: | |
for item in results.get('items', []): | |
title = item.get('title') | |
link = item.get('link') | |
snippet = item.get('snippet') | |
content_structure={} | |
content_structure["Course_Title"]=title | |
content_structure["Course_Link"]=link | |
content_structure["Course_Snippet"]= snippet | |
content_structure["Scraped_Course_Details"]= scrapeCourse(url=link) | |
content.append(content_structure) | |
return content | |
async def upload_file(user_id,file: UploadFile = File(...)): | |
content = await file.read() # Read the file content (this will return bytes) | |
sentences=[] | |
print(f"File name: {file.filename}") | |
print(f"File content type: {file.content_type}") | |
print(f"File size: {file.size} bytes") | |
if "pdf" == file.filename.split('.')[1]: | |
pdf_document = fitz.open(stream=BytesIO(content), filetype="pdf") | |
extracted_text = "" | |
for page_num in range(pdf_document.page_count): | |
page = pdf_document.load_page(page_num) | |
extracted_text += page.get_text() | |
elif "docx" == file.filename.split('.')[1]: | |
docx_file = BytesIO(content) | |
doc = docx.Document(docx_file) | |
extracted_text = "" | |
for para in doc.paragraphs: | |
extracted_text += para.text + "\n" | |
sentences = split_text_into_chunks(extracted_text,chunk_size=200) | |
docs = generate_embedding_for_user_resume(data=sentences,user_id=file.filename) | |
response= insert_embeddings_into_pinecone_database(doc=docs,api_key=PINECONE_API_KEY,name_space=user_id) | |
return {"filename": file.filename,"response":str(response) } | |
def ask_ai_about_resume(req:AiAnalysis): | |
# Retrieve context from your vector database | |
context = query_vector_database(query=req.Query, api_key=PINECONE_API_KEY, name_space=req.UserId) | |
# Ensure that an event loop is present in this thread. | |
try: | |
loop = asyncio.get_event_loop() | |
except RuntimeError: | |
loop = asyncio.new_event_loop() | |
asyncio.set_event_loop(loop) | |
# Create the Gemini client after the event loop is set up | |
client = genai.Client(api_key=GEMINI_API_KEY) | |
response = client.models.generate_content( | |
model="gemini-2.0-flash", | |
contents=f""" | |
Answer this question using the context provided: | |
question: {req.Query} | |
context: {context} | |
""" | |
) | |
return {"Ai_Response":response.text} | |
def ask_ai_about_resume(request:UserCourse): | |
""" | |
User Profile Information for Career Development | |
This section defines the parameters used to gather information from the user to understand their current employment situation, learning preferences, challenges, and goals related to achieving their dream role. | |
Parameters: | |
employment_status (str): | |
A description of the user's current employment situation (e.g., "unemployed", "part-time", "full-time"). | |
interim_role (str): | |
Indicates whether the user is willing to prepare for an interim role to gain experience and income while pursuing their dream role (e.g., "yes" or "no"). | |
desired_role (str): | |
The role the user ultimately wishes to obtain (e.g., "Full-Stack Developer", "Data Scientist"). | |
motivation (str): | |
The user's reasons or motivations for pursuing the desired role. | |
learning_preference (str): | |
Describes how the user prefers to learn new skills (e.g., "online courses", "self-study", "bootcamp"). | |
hours_spent_learning (str or int): | |
The number of hours per day the user can dedicate to learning. | |
challenges (str): | |
Outlines any obstacles or challenges the user faces in reaching their dream role. | |
timeframe_to_achieve_dream_role (str): | |
The ideal timeframe the user has in mind for achieving their dream role (e.g., "6-12 months"). | |
user_id (str): | |
A unique identifier for the user; used to query personalized data from a vector database or other services. | |
""" | |
# Retrieve context from your vector database | |
# Ensure that an event loop is present in this thread. | |
try: | |
loop = asyncio.get_event_loop() | |
except RuntimeError: | |
loop = asyncio.new_event_loop() | |
asyncio.set_event_loop(loop) | |
# Create the Gemini client after the event loop is set up | |
client = genai.Client(api_key=GEMINI_API_KEY) | |
response = client.models.generate_content( | |
model="gemini-2.0-flash", | |
contents=f""" | |
please respond with a JSON object that contains the following keys as a response: | |
- "coursename": the name of the recommended course, | |
- "completiontime": an estimate of how long it would take to complete the course. | |
Do not include any extra text. | |
Recommend a course using this information below : | |
Which of the following best describes you?: {request.EmploymentStatus} | |
Would you like to prepare for an interim role to gain experience and income while pursuing your dream job?: {request.InterimRole} | |
What is your desired role?: {request.DesiredRole} | |
Why do you want to achieve this desired role?: {request.Motivation} | |
How do you prefer to learn new skills?: {request.LearningPreference} | |
How many hours per day can you dedicate to learning?: {request.HoursSpentLearning} | |
What are the biggest challenges or obstacles you face in reaching your dream role?: {request.Challenges} | |
What is your ideal timeframe for achieving your dream role?: {request.TimeframeToAchieveDreamRole} | |
""" | |
) | |
course_info = extract_course_info(response.text) | |
courses = get_course_func(query=course_info.CourseName) | |
return {"CourseInfo":course_info,"Courses":courses} | |
def login(user:UserBody): | |
user ={"email":user.Email,"password":user.Password} | |
user_id= login_user(db_uri=MONGO_URI,db_name="crayonics",collection_name="users",document=user) | |
return {"user_id":user_id} | |
def signUp(user:UserBody): | |
user ={"email":user.Email,"password":user.Password} | |
user_id= create_user(db_uri=MONGO_URI,db_name="crayonics",collection_name="users",document=user) | |
return {"user_id":user_id} |