Spaces:

SPJIMR-Internship
/

SPJIMR_FlipClassroom_RCopilot_ResearchInternship

Sleeping

SPJIMR_FlipClassroom_RCopilot_ResearchInternship

File size: 5,870 Bytes

e107ee4

from pymongo import MongoClient
from datetime import datetime
import openai
import google.generativeai as genai
import streamlit as st
from db import courses_collection2, faculty_collection, students_collection, vectors_collection
from PIL import Image
import PyPDF2, docx, io
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document
from bson import ObjectId
from dotenv import load_dotenv
import os
from create_course import courses_collection

load_dotenv()
MONGO_URI = os.getenv('MONGO_URI')
OPENAI_KEY = os.getenv('OPENAI_KEY')
GEMINI_KEY = os.getenv('GEMINI_KEY')


client = MongoClient(MONGO_URI)
db = client['novascholar_db']
resources_collection = db['resources']

# Configure APIs
openai.api_key = OPENAI_KEY
genai.configure(api_key=GEMINI_KEY)
model = genai.GenerativeModel('gemini-pro')

def upload_resource(course_id, session_id, file_name, file_content, material_type):
    # material_data = {
    #     "session_id": session_id,
    #     "course_id": course_id,
    #     "file_name": file_name,
    #     "file_content": file_content,
    #     "material_type": material_type,
    #     "uploaded_at": datetime.utcnow()
    # }
    # return resources_collection.insert_one(material_data)
    # resource_id = ObjectId()
    
    # Extract text content from the file
    text_content = extract_text_from_file(file_content)
    
    # Check if a resource with this file name already exists
    existing_resource = resources_collection.find_one({
        "session_id": session_id,
        "file_name": file_name
    })
    
    if existing_resource:
        return existing_resource["_id"]

    # Read the file content
    file_content.seek(0)  # Reset the file pointer to the beginning
    original_file_content = file_content.read()
    

    resource_data = {
        "_id": ObjectId(),
        "course_id": course_id,
        "session_id": session_id,
        "file_name": file_name,
        "file_type": file_content.type,
        "text_content": text_content,
        "file_content": original_file_content,  # Store the original file content
        "material_type": material_type,
        "uploaded_at": datetime.utcnow()
    }
    
    resources_collection.insert_one(resource_data)
    resource_id = resource_data["_id"]
    
    courses_collection.update_one(
        {
            "course_id": course_id,
            "sessions.session_id": session_id
        },
        {
            "$push": {"sessions.$.pre_class.resources": resource_id}
        }
    )
    # print("End of Upload Resource, Resource ID is: ", resource_id)
    # return resource_id
    if text_content: 
        create_vector_store(text_content, resource_id)
    return resource_id

def assignment_submit(student_id, course_id, session_id, assignment_id,  file_name, file_content, text_content, material_type):
    # Read the file content
    file_content.seek(0)  # Reset the file pointer to the beginning
    original_file_content = file_content.read()
    
    assignment_data = {
        "student_id": student_id,
        "course_id": course_id,
        "session_id": session_id,
        "assignment_id": assignment_id,
        "file_name": file_name,
        "file_type": file_content.type,
        "file_content": original_file_content,  # Store the original file content
        "text_content": text_content,
        "material_type": material_type,
        "submitted_at": datetime.utcnow(),
        "file_url": "sample_url"
    }
    try:
        courses_collection2.update_one(
            {
                "course_id": course_id,
                "sessions.session_id": session_id,
                "sessions.post_class.assignments.id": assignment_id
            },
            {
                "$push": {"sessions.$.post_class.assignments.$[assignment].submissions": assignment_data}
            },
            array_filters=[{"assignment.id": assignment_id}]
        )
        return True
    except Exception as db_error:
        print(f"Error saving submission: {str(db_error)}")
        return False

def extract_text_from_file(uploaded_file):
    text = ""
    file_type = uploaded_file.type
    
    try:
        if file_type == "text/plain":
            text = uploaded_file.getvalue().decode("utf-8")
        elif file_type == "application/pdf":
            pdf_reader = PyPDF2.PdfReader(io.BytesIO(uploaded_file.getvalue()))
            for page in pdf_reader.pages:
                text += page.extract_text() + "\n"
        elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
            doc = docx.Document(io.BytesIO(uploaded_file.getvalue()))
            for para in doc.paragraphs:
                text += para.text + "\n"
        return text
    except Exception as e:
        st.error(f"Error processing file: {str(e)}")
        return None

def get_embedding(text):
    response = openai.embeddings.create(
        model="text-embedding-ada-002",
        input=text
    )
    return response.data[0].embedding

def create_vector_store(text, resource_id):
    # resource_object_id = ObjectId(resource_id)
    # Ensure resource_id is an ObjectId
    # if not isinstance(resource_id, ObjectId):
    #     resource_id = ObjectId(resource_id)
    
    existing_vector = vectors_collection.find_one({
        "resource_id": resource_id,
        "text": text
    })
    
    if existing_vector:
        print(f"Vector already exists for Resource ID: {resource_id}")
        return

    print(f"In Vector Store method, Resource ID is: {resource_id}")
    document = Document(text=text)
    embedding = get_embedding(text)
    
    vector_data = {
        "resource_id": resource_id,
        "vector": embedding,
        "text": text,
        "created_at": datetime.utcnow()
    }
    
    vectors_collection.insert_one(vector_data)
    
    # return VectorStoreIndex.from_documents([document])