Spaces:
Sleeping
Sleeping
| from io import BytesIO | |
| from dotenv import load_dotenv | |
| import os | |
| from utils import google_search,split_text_into_chunks,insert_embeddings_into_pinecone_database,query_vector_database,generate_embedding_for_user_resume,delete_vector_namespace | |
| from fastapi import FastAPI, File, UploadFile | |
| from fastapi.responses import JSONResponse | |
| import docx | |
| import fitz | |
| import asyncio | |
| from google import genai | |
| load_dotenv() | |
| CX = os.getenv("SEARCH_ENGINE_ID") | |
| API_KEY = os.getenv("GOOGLE_API_KEY") | |
| PINECONE_API_KEY=os.getenv("PINECONE_API_KEY") | |
| GEMINI_API_KEY=os.getenv("GEMINI_API_KEY") | |
| app = FastAPI() | |
| def get_course(query): | |
| # Example search query | |
| results = google_search(query, API_KEY, CX) | |
| content=[] | |
| if results: | |
| for item in results.get('items', []): | |
| title = item.get('title') | |
| link = item.get('link') | |
| snippet = item.get('snippet') | |
| content_structure={} | |
| content_structure["Course_Title"]=title | |
| content_structure["Course_Link"]=link | |
| content_structure["Course_Snippet"]= snippet | |
| content.append(content_structure) | |
| return JSONResponse(content,status_code=200) | |
| async def upload_file(user_id,file: UploadFile = File(...)): | |
| content = await file.read() # Read the file content (this will return bytes) | |
| sentences=[] | |
| # Print file details for debugging | |
| print(f"File name: {file.filename}") | |
| print(f"File content type: {file.content_type}") | |
| print(f"File size: {file.size} bytes") | |
| if "pdf" == file.filename.split('.')[1]: | |
| pdf_document = fitz.open(stream=BytesIO(content), filetype="pdf") | |
| # Print the content of the file (if it's text, you can decode it) | |
| extracted_text = "" | |
| for page_num in range(pdf_document.page_count): | |
| page = pdf_document.load_page(page_num) | |
| extracted_text += page.get_text() | |
| elif "docx" == file.filename.split('.')[1]: | |
| docx_file = BytesIO(content) | |
| doc = docx.Document(docx_file) | |
| extracted_text = "" | |
| for para in doc.paragraphs: | |
| extracted_text += para.text + "\n" | |
| sentences = split_text_into_chunks(extracted_text,chunk_size=200) | |
| docs = generate_embedding_for_user_resume(data=sentences,user_id=file.filename) | |
| response= insert_embeddings_into_pinecone_database(doc=docs,api_key=PINECONE_API_KEY,name_space=user_id) | |
| return {"filename": file.filename,"response":str(response) } | |
| def ask_ai_about_resume(query, user_id): | |
| # Retrieve context from your vector database | |
| context = query_vector_database(query=query, api_key=PINECONE_API_KEY, name_space=user_id) | |
| # Ensure that an event loop is present in this thread. | |
| try: | |
| loop = asyncio.get_event_loop() | |
| except RuntimeError: | |
| loop = asyncio.new_event_loop() | |
| asyncio.set_event_loop(loop) | |
| # Create the Gemini client after the event loop is set up | |
| client = genai.Client(api_key=GEMINI_API_KEY) | |
| response = client.models.generate_content( | |
| model="gemini-2.0-flash", | |
| contents=f""" | |
| Answer this question using the context provided: | |
| question: {query} | |
| context: {context} | |
| """ | |
| ) | |
| return {"Ai Response":response.text} | |