jeremierostan's picture
Update app.py
4480940 verified
raw
history blame
4.19 kB
import os
import json
import gradio as gr
from typing import List, Dict
from langchain.document_loaders import AirtableLoader
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage
from langchain.text_splitter import CharacterTextSplitter
from langchain.docstore.document import Document
# Set up API keys
AIRTABLE_API_KEY = os.getenv("AIRTABLE_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
base_id = os.getenv("base_id")
table_id = os.getenv("table_id")
view = os.getenv("view")
def load_airtable_data() -> List[Dict]:
"""Load data from Airtable and return as a list of dictionaries."""
loader = AirtableLoader(os.environ["AIRTABLE_API_KEY"], table_id, base_id, view=view)
documents = loader.load()
data = []
for doc in documents:
try:
# Try to parse the JSON content
record = json.loads(doc.page_content)
data.append(record)
except json.JSONDecodeError:
# If JSON parsing fails, use the raw content
print(f"Warning: Could not parse JSON for document: {doc.page_content[:100]}...")
data.append({"raw_content": doc.page_content})
return data
# Load Airtable data
try:
airtable_data = load_airtable_data()
print(f"Successfully loaded {len(airtable_data)} records from Airtable.")
except Exception as e:
print(f"Error loading Airtable data: {str(e)}")
airtable_data = []
# Prepare documents for embedding
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = [Document(page_content=json.dumps(record)) for record in airtable_data]
split_documents = text_splitter.split_documents(documents)
# Initialize the embedding model and FAISS index
embedding_model = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(split_documents, embedding_model)
# Define the retrieval model
retriever = vectorstore.as_retriever()
# Define the chat model
chat_model = ChatOpenAI(model="gpt-4o")
# Define a custom prompt for context
system_message_content = """
You are a school assistant with strong database Q&A capabilities.
Your role is to help educators keep track of students' assignments in different classes.
This is a complex problem, because each student has their own menu of classes (they choose their classes), so that it can be hard for a teacher to know what assignments their students might have
in other classes. Solving this requires carefully analyzing a database.
You have acces to a database with the following format:
-List of classes
-List of DUE dates, when students turn in work done at home
-List of DO dates, when students take assessments in class
-List of DUE assignments
-List of DO assessments
The policy is that students cannot have to DO more than 2 in-class assignments on a given day.
HOWEVER, they might have 2 or more assignments DUE on the same day.
Be concise and factual in your answers unless asked for more details.
Base all of your answers on the data provided.
Double-check your answers, and if you don't know the answer, say that you don't know.
"""
# Create the QA chain
qa_chain = RetrievalQA.from_chain_type(
llm=chat_model,
chain_type="stuff",
retriever=retriever,
return_source_documents=True
)
def ask_question(question: str) -> str:
"""Ask a question about the Airtable data."""
# Combine the system message and user question
full_query = f"{system_message_content}\n\nHuman: {question}\n\nAssistant:"
# Get the response from the QA chain
response = qa_chain({"query": full_query})
# Return the response content
return response['result']
# Define the Gradio interface
def gradio_interface(question: str) -> str:
return ask_question(question)
# Set up Gradio interface
iface = gr.Interface(
fn=gradio_interface,
inputs="text",
outputs="text",
title="Summative Assessment Tracker",
description="I am here to help you schedule summative assessments for your students"
)
# Launch the Gradio app
iface.launch(debug=True)