from pymongo import MongoClient from datetime import datetime import openai import google.generativeai as genai import streamlit as st from db import courses_collection2, faculty_collection, students_collection, vectors_collection from PIL import Image import PyPDF2, docx, io from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document from bson import ObjectId from dotenv import load_dotenv import os from create_course import courses_collection load_dotenv() MONGO_URI = os.getenv('MONGO_URI') OPENAI_KEY = os.getenv('OPENAI_KEY') GEMINI_KEY = os.getenv('GEMINI_KEY') client = MongoClient(MONGO_URI) db = client['novascholar_db'] resources_collection = db['resources'] # Configure APIs openai.api_key = OPENAI_KEY genai.configure(api_key=GEMINI_KEY) model = genai.GenerativeModel('gemini-pro') def upload_resource(course_id, session_id, file_name, file_content, material_type): # material_data = { # "session_id": session_id, # "course_id": course_id, # "file_name": file_name, # "file_content": file_content, # "material_type": material_type, # "uploaded_at": datetime.utcnow() # } # return resources_collection.insert_one(material_data) # resource_id = ObjectId() # Extract text content from the file text_content = extract_text_from_file(file_content) # Check if a resource with this file name already exists existing_resource = resources_collection.find_one({ "session_id": session_id, "file_name": file_name }) if existing_resource: return existing_resource["_id"] # Read the file content file_content.seek(0) # Reset the file pointer to the beginning original_file_content = file_content.read() resource_data = { "_id": ObjectId(), "course_id": course_id, "session_id": session_id, "file_name": file_name, "file_type": file_content.type, "text_content": text_content, "file_content": original_file_content, # Store the original file content "material_type": material_type, "uploaded_at": datetime.utcnow() } resources_collection.insert_one(resource_data) resource_id = resource_data["_id"] courses_collection.update_one( { "course_id": course_id, "sessions.session_id": session_id }, { "$push": {"sessions.$.pre_class.resources": resource_id} } ) # print("End of Upload Resource, Resource ID is: ", resource_id) # return resource_id if text_content: create_vector_store(text_content, resource_id) return resource_id def assignment_submit(student_id, course_id, session_id, assignment_id, file_name, file_content, text_content, material_type): # Read the file content file_content.seek(0) # Reset the file pointer to the beginning original_file_content = file_content.read() assignment_data = { "student_id": student_id, "course_id": course_id, "session_id": session_id, "assignment_id": assignment_id, "file_name": file_name, "file_type": file_content.type, "file_content": original_file_content, # Store the original file content "text_content": text_content, "material_type": material_type, "submitted_at": datetime.utcnow(), "file_url": "sample_url" } try: courses_collection2.update_one( { "course_id": course_id, "sessions.session_id": session_id, "sessions.post_class.assignments.id": assignment_id }, { "$push": {"sessions.$.post_class.assignments.$[assignment].submissions": assignment_data} }, array_filters=[{"assignment.id": assignment_id}] ) return True except Exception as db_error: print(f"Error saving submission: {str(db_error)}") return False def extract_text_from_file(uploaded_file): text = "" file_type = uploaded_file.type try: if file_type == "text/plain": text = uploaded_file.getvalue().decode("utf-8") elif file_type == "application/pdf": pdf_reader = PyPDF2.PdfReader(io.BytesIO(uploaded_file.getvalue())) for page in pdf_reader.pages: text += page.extract_text() + "\n" elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": doc = docx.Document(io.BytesIO(uploaded_file.getvalue())) for para in doc.paragraphs: text += para.text + "\n" return text except Exception as e: st.error(f"Error processing file: {str(e)}") return None def get_embedding(text): response = openai.embeddings.create( model="text-embedding-ada-002", input=text ) return response.data[0].embedding def create_vector_store(text, resource_id): # resource_object_id = ObjectId(resource_id) # Ensure resource_id is an ObjectId # if not isinstance(resource_id, ObjectId): # resource_id = ObjectId(resource_id) existing_vector = vectors_collection.find_one({ "resource_id": resource_id, "text": text }) if existing_vector: print(f"Vector already exists for Resource ID: {resource_id}") return print(f"In Vector Store method, Resource ID is: {resource_id}") document = Document(text=text) embedding = get_embedding(text) vector_data = { "resource_id": resource_id, "vector": embedding, "text": text, "created_at": datetime.utcnow() } vectors_collection.insert_one(vector_data) # return VectorStoreIndex.from_documents([document])