DrishtiSharma commited on
Commit
2648437
·
verified ·
1 Parent(s): b1ce5ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -5
app.py CHANGED
@@ -1,4 +1,3 @@
1
- import os
2
  import streamlit as st
3
  import pandas as pd
4
  from datasets import load_dataset
@@ -19,7 +18,7 @@ def load_huggingface_dataset(dataset_name):
19
  try:
20
  # Incrementally update progress
21
  progress_bar.progress(10)
22
- dataset = load_dataset(dataset_name, name="sample", split="train", trust_remote_code=True, uniform_split=True)
23
  progress_bar.progress(50)
24
  if hasattr(dataset, "to_pandas"):
25
  df = dataset.to_pandas()
@@ -110,7 +109,7 @@ if "df" in st.session_state:
110
  df.to_csv("patent_data.csv", index=False)
111
 
112
  csv_agent = create_csv_agent(
113
- ChatOpenAI(temperature=0, model="gpt-4", api_key=os.getenv("OPENAI_API_KEY")),
114
  path=["patent_data.csv"],
115
  verbose=True,
116
  agent_type=AgentType.OPENAI_FUNCTIONS,
@@ -123,9 +122,27 @@ if "df" in st.session_state:
123
  if st.button("Run Query"):
124
  with st.spinner("Running query..."):
125
  try:
126
- result = csv_agent.invoke(query)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  st.success("Query executed successfully!")
128
  st.write("### Query Result:")
129
- st.write(result)
 
130
  except Exception as e:
131
  st.error(f"Error executing query: {e}")
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  from datasets import load_dataset
 
18
  try:
19
  # Incrementally update progress
20
  progress_bar.progress(10)
21
+ dataset = load_dataset(dataset_name, name="sample", split="train", trust_remote_code=True)
22
  progress_bar.progress(50)
23
  if hasattr(dataset, "to_pandas"):
24
  df = dataset.to_pandas()
 
109
  df.to_csv("patent_data.csv", index=False)
110
 
111
  csv_agent = create_csv_agent(
112
+ ChatOpenAI(temperature=0, model="gpt-4", api_key=st.secrets["api_key"]),
113
  path=["patent_data.csv"],
114
  verbose=True,
115
  agent_type=AgentType.OPENAI_FUNCTIONS,
 
122
  if st.button("Run Query"):
123
  with st.spinner("Running query..."):
124
  try:
125
+ # Split query execution into smaller chunks if needed
126
+ max_rows = 1000
127
+ total_rows = len(df)
128
+ results = []
129
+
130
+ for start in range(0, total_rows, max_rows):
131
+ chunk = df.iloc[start:start + max_rows]
132
+ chunk.to_csv("chunk_data.csv", index=False)
133
+ partial_agent = create_csv_agent(
134
+ ChatOpenAI(temperature=0, model="gpt-4", api_key=st.secrets["api_key"]),
135
+ path=["chunk_data.csv"],
136
+ verbose=True,
137
+ agent_type=AgentType.OPENAI_FUNCTIONS,
138
+ allow_dangerous_code=True
139
+ )
140
+ result = partial_agent.invoke(query)
141
+ results.append(result)
142
+
143
  st.success("Query executed successfully!")
144
  st.write("### Query Result:")
145
+ st.write("\n".join(results))
146
+
147
  except Exception as e:
148
  st.error(f"Error executing query: {e}")