File size: 4,186 Bytes
cc3fb29
 
 
 
 
 
 
 
 
 
 
 
 
 
4480940
 
cc3fb29
4480940
 
 
cc3fb29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import os
import json
import gradio as gr
from typing import List, Dict
from langchain.document_loaders import AirtableLoader
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage
from langchain.text_splitter import CharacterTextSplitter
from langchain.docstore.document import Document

# Set up API keys
AIRTABLE_API_KEY = os.getenv("AIRTABLE_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

base_id = os.getenv("base_id")
table_id = os.getenv("table_id")
view = os.getenv("view")

def load_airtable_data() -> List[Dict]:
    """Load data from Airtable and return as a list of dictionaries."""
    loader = AirtableLoader(os.environ["AIRTABLE_API_KEY"], table_id, base_id, view=view)
    documents = loader.load()
    data = []
    for doc in documents:
        try:
            # Try to parse the JSON content
            record = json.loads(doc.page_content)
            data.append(record)
        except json.JSONDecodeError:
            # If JSON parsing fails, use the raw content
            print(f"Warning: Could not parse JSON for document: {doc.page_content[:100]}...")
            data.append({"raw_content": doc.page_content})
    return data

# Load Airtable data
try:
    airtable_data = load_airtable_data()
    print(f"Successfully loaded {len(airtable_data)} records from Airtable.")
except Exception as e:
    print(f"Error loading Airtable data: {str(e)}")
    airtable_data = []

# Prepare documents for embedding
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = [Document(page_content=json.dumps(record)) for record in airtable_data]
split_documents = text_splitter.split_documents(documents)

# Initialize the embedding model and FAISS index
embedding_model = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(split_documents, embedding_model)

# Define the retrieval model
retriever = vectorstore.as_retriever()

# Define the chat model
chat_model = ChatOpenAI(model="gpt-4o")

# Define a custom prompt for context
system_message_content = """
You are a school assistant with strong database Q&A capabilities. 
Your role is to help educators keep track of students' assignments in different classes.
This is a complex problem, because each student has their own menu of classes (they choose their classes), so that it can be hard for a teacher to know what assignments their students might have
in other classes. Solving this requires carefully analyzing a database.
You have acces to a database with the following format:
-List of classes
-List of DUE dates, when students turn in work done at home
-List of DO dates, when students take assessments in class
-List of DUE assignments
-List of DO assessments
The policy is that students cannot have to DO more than 2 in-class assignments on a given day.
HOWEVER, they might have 2 or more assignments DUE on the same day.
Be concise and factual in your answers unless asked for more details. 
Base all of your answers on the data provided.
Double-check your answers, and if you don't know the answer, say that you don't know.
"""

# Create the QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=chat_model,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

def ask_question(question: str) -> str:
    """Ask a question about the Airtable data."""
    # Combine the system message and user question
    full_query = f"{system_message_content}\n\nHuman: {question}\n\nAssistant:"
    
    # Get the response from the QA chain
    response = qa_chain({"query": full_query})
    
    # Return the response content
    return response['result']

# Define the Gradio interface
def gradio_interface(question: str) -> str:
    return ask_question(question)

# Set up Gradio interface
iface = gr.Interface(
    fn=gradio_interface,
    inputs="text",
    outputs="text",
    title="Summative Assessment Tracker",
    description="I am here to help you schedule summative assessments for your students"
)

# Launch the Gradio app
iface.launch(debug=True)