|
import os |
|
import json |
|
import gradio as gr |
|
from typing import List, Dict |
|
from langchain.document_loaders import AirtableLoader |
|
from langchain.vectorstores import FAISS |
|
from langchain.embeddings import OpenAIEmbeddings |
|
from langchain.chains import RetrievalQA |
|
from langchain.chat_models import ChatOpenAI |
|
from langchain.schema import SystemMessage, HumanMessage |
|
from langchain.text_splitter import CharacterTextSplitter |
|
from langchain.docstore.document import Document |
|
|
|
|
|
AIRTABLE_API_KEY = os.getenv("AIRTABLE_API_KEY") |
|
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") |
|
|
|
base_id = os.getenv("base_id") |
|
table_id = os.getenv("table_id") |
|
view = os.getenv("view") |
|
|
|
def load_airtable_data() -> List[Dict]: |
|
"""Load data from Airtable and return as a list of dictionaries.""" |
|
loader = AirtableLoader(os.environ["AIRTABLE_API_KEY"], table_id, base_id, view=view) |
|
documents = loader.load() |
|
data = [] |
|
for doc in documents: |
|
try: |
|
|
|
record = json.loads(doc.page_content) |
|
data.append(record) |
|
except json.JSONDecodeError: |
|
|
|
print(f"Warning: Could not parse JSON for document: {doc.page_content[:100]}...") |
|
data.append({"raw_content": doc.page_content}) |
|
return data |
|
|
|
|
|
try: |
|
airtable_data = load_airtable_data() |
|
print(f"Successfully loaded {len(airtable_data)} records from Airtable.") |
|
except Exception as e: |
|
print(f"Error loading Airtable data: {str(e)}") |
|
airtable_data = [] |
|
|
|
|
|
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) |
|
documents = [Document(page_content=json.dumps(record)) for record in airtable_data] |
|
split_documents = text_splitter.split_documents(documents) |
|
|
|
|
|
embedding_model = OpenAIEmbeddings() |
|
vectorstore = FAISS.from_documents(split_documents, embedding_model) |
|
|
|
|
|
retriever = vectorstore.as_retriever() |
|
|
|
|
|
chat_model = ChatOpenAI(model="gpt-4o") |
|
|
|
|
|
system_message_content = """ |
|
You are a school assistant with strong database Q&A capabilities. |
|
Your role is to help educators keep track of students' assignments in different classes. |
|
This is a complex problem, because each student has their own menu of classes (they choose their classes), so that it can be hard for a teacher to know what assignments their students might have |
|
in other classes. Solving this requires carefully analyzing a database. |
|
You have acces to a database with the following format: |
|
-List of classes |
|
-List of DUE dates, when students turn in work done at home |
|
-List of DO dates, when students take assessments in class |
|
-List of DUE assignments |
|
-List of DO assessments |
|
The policy is that students cannot have to DO more than 2 in-class assignments on a given day. |
|
HOWEVER, they might have 2 or more assignments DUE on the same day. |
|
Be concise and factual in your answers unless asked for more details. |
|
Base all of your answers on the data provided. |
|
Double-check your answers, and if you don't know the answer, say that you don't know. |
|
""" |
|
|
|
|
|
qa_chain = RetrievalQA.from_chain_type( |
|
llm=chat_model, |
|
chain_type="stuff", |
|
retriever=retriever, |
|
return_source_documents=True |
|
) |
|
|
|
def ask_question(question: str) -> str: |
|
"""Ask a question about the Airtable data.""" |
|
|
|
full_query = f"{system_message_content}\n\nHuman: {question}\n\nAssistant:" |
|
|
|
|
|
response = qa_chain({"query": full_query}) |
|
|
|
|
|
return response['result'] |
|
|
|
|
|
def gradio_interface(question: str) -> str: |
|
return ask_question(question) |
|
|
|
|
|
iface = gr.Interface( |
|
fn=gradio_interface, |
|
inputs="text", |
|
outputs="text", |
|
title="Summative Assessment Tracker", |
|
description="I am here to help you schedule summative assessments for your students" |
|
) |
|
|
|
|
|
iface.launch(debug=True) |