import os import json import gradio as gr from typing import List, Dict from langchain.document_loaders import AirtableLoader from langchain.vectorstores import FAISS from langchain.embeddings import OpenAIEmbeddings from langchain.chains import RetrievalQA from langchain.chat_models import ChatOpenAI from langchain.schema import SystemMessage, HumanMessage from langchain.text_splitter import CharacterTextSplitter from langchain.docstore.document import Document # Set up API keys os.environ["AIRTABLE_API_KEY"] = os.getenv["AIRTABLE_API_KEY"] os.environ["OPENAI_API_KEY"] = os.getenv["OPENAI_API_KEY"] base_id = os.getenv["base_id"] table_id = os.getenv["table_id"] view = os.getenv["view"] def load_airtable_data() -> List[Dict]: """Load data from Airtable and return as a list of dictionaries.""" loader = AirtableLoader(os.environ["AIRTABLE_API_KEY"], table_id, base_id, view=view) documents = loader.load() data = [] for doc in documents: try: # Try to parse the JSON content record = json.loads(doc.page_content) data.append(record) except json.JSONDecodeError: # If JSON parsing fails, use the raw content print(f"Warning: Could not parse JSON for document: {doc.page_content[:100]}...") data.append({"raw_content": doc.page_content}) return data # Load Airtable data try: airtable_data = load_airtable_data() print(f"Successfully loaded {len(airtable_data)} records from Airtable.") except Exception as e: print(f"Error loading Airtable data: {str(e)}") airtable_data = [] # Prepare documents for embedding text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) documents = [Document(page_content=json.dumps(record)) for record in airtable_data] split_documents = text_splitter.split_documents(documents) # Initialize the embedding model and FAISS index embedding_model = OpenAIEmbeddings() vectorstore = FAISS.from_documents(split_documents, embedding_model) # Define the retrieval model retriever = vectorstore.as_retriever() # Define the chat model chat_model = ChatOpenAI(model="gpt-4o") # Define a custom prompt for context system_message_content = """ You are a school assistant with strong database Q&A capabilities. Your role is to help educators keep track of students' assignments in different classes. This is a complex problem, because each student has their own menu of classes (they choose their classes), so that it can be hard for a teacher to know what assignments their students might have in other classes. Solving this requires carefully analyzing a database. You have acces to a database with the following format: -List of classes -List of DUE dates, when students turn in work done at home -List of DO dates, when students take assessments in class -List of DUE assignments -List of DO assessments The policy is that students cannot have to DO more than 2 in-class assignments on a given day. HOWEVER, they might have 2 or more assignments DUE on the same day. Be concise and factual in your answers unless asked for more details. Base all of your answers on the data provided. Double-check your answers, and if you don't know the answer, say that you don't know. """ # Create the QA chain qa_chain = RetrievalQA.from_chain_type( llm=chat_model, chain_type="stuff", retriever=retriever, return_source_documents=True ) def ask_question(question: str) -> str: """Ask a question about the Airtable data.""" # Combine the system message and user question full_query = f"{system_message_content}\n\nHuman: {question}\n\nAssistant:" # Get the response from the QA chain response = qa_chain({"query": full_query}) # Return the response content return response['result'] # Define the Gradio interface def gradio_interface(question: str) -> str: return ask_question(question) # Set up Gradio interface iface = gr.Interface( fn=gradio_interface, inputs="text", outputs="text", title="Summative Assessment Tracker", description="I am here to help you schedule summative assessments for your students" ) # Launch the Gradio app iface.launch(debug=True)