jeremierostan commited on
Commit
7cd0b73
·
verified ·
1 Parent(s): d0ea5ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -91
app.py CHANGED
@@ -1,130 +1,97 @@
1
  import os
2
  import json
 
3
  import gradio as gr
 
 
4
  from typing import List, Dict
5
  from langchain.document_loaders import AirtableLoader
6
- from langchain.vectorstores import FAISS
7
- from langchain.embeddings import OpenAIEmbeddings
8
- from langchain.chains import RetrievalQA
9
- from langchain.chat_models import ChatOpenAI
10
- from langchain.schema import SystemMessage, HumanMessage
11
- from langchain.text_splitter import CharacterTextSplitter
12
- from langchain.docstore.document import Document
13
 
14
  # Set up API keys
15
  AIRTABLE_API_KEY = os.getenv("AIRTABLE_API_KEY")
16
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
17
-
18
- base_id = os.getenv("base_id")
19
- table_id = os.getenv("table_id")
20
- view = os.getenv("view")
21
-
22
- def load_airtable_data() -> List[Dict]:
23
- """Load data from Airtable and return as a list of dictionaries."""
24
- loader = AirtableLoader(os.environ["AIRTABLE_API_KEY"], table_id, base_id, view=view)
 
 
 
 
 
 
 
 
25
  documents = loader.load()
26
  data = []
27
  for doc in documents:
28
  try:
29
- # Try to parse the JSON content
30
  record = json.loads(doc.page_content)
31
  data.append(record)
32
  except json.JSONDecodeError:
33
- # If JSON parsing fails, use the raw content
34
  print(f"Warning: Could not parse JSON for document: {doc.page_content[:100]}...")
35
  data.append({"raw_content": doc.page_content})
36
- return data
37
 
38
- # Load Airtable data
39
  try:
40
- airtable_data = load_airtable_data()
41
- print(f"Successfully loaded {len(airtable_data)} records from Airtable.")
42
  except Exception as e:
43
  print(f"Error loading Airtable data: {str(e)}")
44
- airtable_data = []
45
-
46
- # Prepare documents for embedding
47
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
48
- documents = [Document(page_content=json.dumps(record)) for record in airtable_data]
49
- split_documents = text_splitter.split_documents(documents)
50
-
51
- # Initialize the embedding model and FAISS index
52
- embedding_model = OpenAIEmbeddings()
53
- vectorstore = FAISS.from_documents(split_documents, embedding_model)
54
-
55
- # Define the retrieval model
56
- retriever = vectorstore.as_retriever()
57
-
58
- # Define the chat model
59
- chat_model = ChatOpenAI(model="gpt-4o")
60
-
61
- # Define a custom prompt for context
62
- system_message_content = """
63
- #Context
64
- You are a school assistant with strong database Q&A capabilities.
65
- Your role is to help educators keep track of students' assignments in different classes.
66
- This is a complex problem, because each student has their own menu of classes (they choose their classes), so that it can be hard for a teacher to know what assignments their students might have
67
- in other classes. Solving this requires carefully analyzing a database.
68
- You have acces to a list of records with the following format:
69
- -Class
70
- -List of students enrolled in the class (student codes)
71
- -List of DUE dates, when students turn in work done at home
72
- -List of DO dates, when students take assessments in class
73
- -List of DUE assignments
74
- -List of DO assessments
75
- The policy is that students cannot have 0, 1, or 2 DO assessments the same day, but not 3 or more.
76
- HOWEVER, DUE assignments do not count towards this total.
77
-
78
- #Instructions
79
- When asked a question about a class and a date, follow this thought process internally without sharing it with the user. Only share the conclusion.
80
- [Thought process (hidden internal state):
81
- -Store a dictionary of all students enrolled in this class
82
- -Look up all other classes one by one, and check if it has a DO date the same day, and if some of the same students are enrolled. Make sure to double-check the student codes and rosters
83
- -Deduce whether any student in the first class already has 2 or more "DO" assessments on that day in the other classes
84
- -Double check by running the same steps again and testing your conclusion. Pay careful attention to student codes and dates.
85
-
86
- Keep this procedure in mind when answering other questions as well.
87
- ]
88
 
89
- This is a comple time. Take your time and be thorough and this is critically important to the school.
90
- Be concise and factual in your answers unless asked for more details.
91
- Base all of your answers on the data provided.
92
- Start your response with a thumb up 👍 or a thumb down 👎 depending on your answer.
93
- Double-check your answers, and if you don't know the answer, say that you don't know.
94
- """
95
-
96
- # Create the QA chain
97
- qa_chain = RetrievalQA.from_chain_type(
98
- llm=chat_model,
99
- chain_type="stuff",
100
- retriever=retriever,
101
- return_source_documents=True
102
- )
103
-
104
- def ask_question(question: str) -> str:
105
- """Ask a question about the Airtable data."""
106
- # Combine the system message and user question
107
- full_query = f"{system_message_content}\n\nHuman: {question}\n\nAssistant:"
 
 
 
 
 
108
 
109
- # Get the response from the QA chain
110
- response = qa_chain({"query": full_query})
 
 
 
 
 
111
 
112
- # Return the response content
113
- return response['result']
114
 
115
  # Define the Gradio interface
116
  def gradio_interface(question: str) -> str:
117
- return ask_question(question)
118
 
119
  # Set up Gradio interface
120
  iface = gr.Interface(
121
  fn=gradio_interface,
122
  inputs="text",
123
- #outputs="text",
124
  outputs=gr.Markdown(),
125
  title="📅 Summative Assessment Tracker",
126
  description="I am here to help you schedule summative assessments for your students"
127
  )
128
 
129
  # Launch the Gradio app
130
- iface.launch(debug=True)
 
1
  import os
2
  import json
3
+ import pandas as pd
4
  import gradio as gr
5
+ import openai
6
+ import time
7
  from typing import List, Dict
8
  from langchain.document_loaders import AirtableLoader
 
 
 
 
 
 
 
9
 
10
  # Set up API keys
11
  AIRTABLE_API_KEY = os.getenv("AIRTABLE_API_KEY")
12
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
13
+ BASE_ID = os.getenv("base_id")
14
+ TABLE_ID = os.getenv("table_id")
15
+ VIEW = os.getenv("view")
16
+
17
+ # Set up OpenAI client
18
+ openai.api_key = OPENAI_API_KEY
19
+ client = openai.Client(api_key=OPENAI_API_KEY)
20
+
21
+ # Set up assistant
22
+ ASSISTANT_ID = os.getenv('assistant_id')
23
+ assistant = client.beta.assistants.retrieve(ASSISTANT_ID)
24
+ thread = client.beta.threads.create()
25
+
26
+ # Function to load data from Airtable and return as a DataFrame
27
+ def load_airtable_data() -> pd.DataFrame:
28
+ loader = AirtableLoader(AIRTABLE_API_KEY, TABLE_ID, BASE_ID, view=VIEW)
29
  documents = loader.load()
30
  data = []
31
  for doc in documents:
32
  try:
 
33
  record = json.loads(doc.page_content)
34
  data.append(record)
35
  except json.JSONDecodeError:
 
36
  print(f"Warning: Could not parse JSON for document: {doc.page_content[:100]}...")
37
  data.append({"raw_content": doc.page_content})
38
+ return pd.DataFrame(data)
39
 
40
+ # Load Airtable data into DataFrame
41
  try:
42
+ airtable_data_df = load_airtable_data()
43
+ print(f"Successfully loaded data from Airtable.")
44
  except Exception as e:
45
  print(f"Error loading Airtable data: {str(e)}")
46
+ airtable_data_df = pd.DataFrame()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
+ # Function to chat with the assistant
49
+ def chat_with_assistant(message: str, dataframe: pd.DataFrame) -> str:
50
+ dataframe_csv = dataframe.to_csv(index=False)
51
+
52
+ full_message = f"""
53
+ You are an assistant with code interpreter capabilities.
54
+ I have a DataFrame with the following content:
55
+ {dataframe_csv}
56
+
57
+ Here is my question: {message}
58
+
59
+ Please use the DataFrame and code to provide an answer.
60
+ """
61
+
62
+ client.beta.threads.messages.create(
63
+ thread_id=thread.id,
64
+ role="user",
65
+ content=full_message
66
+ )
67
+
68
+ run = client.beta.threads.runs.create(
69
+ thread_id=thread.id,
70
+ assistant_id=ASSISTANT_ID
71
+ )
72
 
73
+ while True:
74
+ run_status = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
75
+ if run_status.status == 'completed':
76
+ messages = client.beta.threads.messages.list(thread_id=thread.id)
77
+ assistant_response = messages.data[0].content[0].text.value
78
+ break
79
+ time.sleep(1)
80
 
81
+ return assistant_response
 
82
 
83
  # Define the Gradio interface
84
  def gradio_interface(question: str) -> str:
85
+ return chat_with_assistant(question, airtable_data_df)
86
 
87
  # Set up Gradio interface
88
  iface = gr.Interface(
89
  fn=gradio_interface,
90
  inputs="text",
 
91
  outputs=gr.Markdown(),
92
  title="📅 Summative Assessment Tracker",
93
  description="I am here to help you schedule summative assessments for your students"
94
  )
95
 
96
  # Launch the Gradio app
97
+ iface.launch(debug=True)