csv2md / app.py
Chris Pang
first draft
bc88deb
raw
history blame
2.73 kB
import gradio as gr
import pandas as pd
from langchain.agents import create_pandas_dataframe_agent
from langchain.chat_models import ChatOpenAI
import re
import json
def process_inputs(open_api_key, open_api_model, description, csv_file):
# setup agent
try:
df = pd.read_csv(csv_file.name)
df_agent = create_pandas_dataframe_agent(
ChatOpenAI(
temperature=0, openai_api_key=open_api_key, model=open_api_model
),
df,
verbose=True,
)
except Exception as e:
return print(e)
df_agent.agent.llm_chain.prompt.template = (
"Context: " + description + df_agent.agent.llm_chain.prompt.template
)
# Planning
history = {}
description = df_agent.run("Describe the data")
history["description"] = description
planning = df_agent.run(
f"Description: {description} Based on this description and the data itself, what kind of analysis can we perform on the data? Return result in bullet points without the bullets, just new lines"
)
print(planning)
# remove non-Alphanumeric characters at the beginning of the string
tasks = list(
filter(
bool, [re.sub(r"^[\W_]+", "", str.strip()) for str in planning.splitlines()]
)
)
history["planning"] = tasks
# Perform tasks
f = open("result.txt", "a")
result = []
for task in tasks:
print(f"The task is: {task}")
data = df_agent.run(task)
result.append(data)
f.write(data + "\n")
f.close()
history["result"] = result
plots = df_agent.run(
f"Based on the information given below, create some insightful plots using matplotlib, seaborn or plotly with python_repl_ast. Save these plots in the root directory with a relevant name, and return a dict where keys are the name of the files and the values are the insights dervied from the plot. Information about the data is as follows: {history}"
)
history["plots"] = plots
hypothesis = df_agent.run(
f"Based on the information, data, and context given, form valid hypothesis for further investigations. Information about the data is as follows: {history}"
)
history["hypothesis"] = hypothesis
history_pretty = json.dump(history, csv_file, indent=2)
return f"{history_pretty}"
iface = gr.Interface(
fn=process_inputs,
inputs=[
gr.Textbox(lines=1, label="OpenAI API Key"),
gr.Dropdown(["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4"], label="Model"),
gr.Textbox(lines=2, label="Brief description of the data"),
gr.File(label="Upload CSV Only"),
],
outputs=gr.Textbox(),
)
iface.launch()