Spaces:

chriswhpang
/

csv2md

Build error

csv2md / app.py

Chris Pang

first draft

bc88deb over 1 year ago

2.73 kB

	import gradio as gr
	import pandas as pd
	from langchain.agents import create_pandas_dataframe_agent
	from langchain.chat_models import ChatOpenAI
	import re
	import json



	def process_inputs(open_api_key, open_api_model, description, csv_file):
	# setup agent

	try:
	df = pd.read_csv(csv_file.name)
	df_agent = create_pandas_dataframe_agent(
	ChatOpenAI(
	temperature=0, openai_api_key=open_api_key, model=open_api_model
	),
	df,
	verbose=True,
	)
	except Exception as e:
	return print(e)

	df_agent.agent.llm_chain.prompt.template = (
	"Context: " + description + df_agent.agent.llm_chain.prompt.template
	)
	# Planning
	history = {}
	description = df_agent.run("Describe the data")
	history["description"] = description

	planning = df_agent.run(
	f"Description: {description} Based on this description and the data itself, what kind of analysis can we perform on the data? Return result in bullet points without the bullets, just new lines"
	)

	print(planning)

	# remove non-Alphanumeric characters at the beginning of the string
	tasks = list(
	filter(
	bool, [re.sub(r"^[\W_]+", "", str.strip()) for str in planning.splitlines()]
	)
	)
	history["planning"] = tasks

	# Perform tasks
	f = open("result.txt", "a")
	result = []
	for task in tasks:
	print(f"The task is: {task}")
	data = df_agent.run(task)
	result.append(data)
	f.write(data + "\n")

	f.close()
	history["result"] = result

	plots = df_agent.run(
	f"Based on the information given below, create some insightful plots using matplotlib, seaborn or plotly with python_repl_ast. Save these plots in the root directory with a relevant name, and return a dict where keys are the name of the files and the values are the insights dervied from the plot. Information about the data is as follows: {history}"
	)
	history["plots"] = plots

	hypothesis = df_agent.run(
	f"Based on the information, data, and context given, form valid hypothesis for further investigations. Information about the data is as follows: {history}"
	)
	history["hypothesis"] = hypothesis

	history_pretty = json.dump(history, csv_file, indent=2)

	return f"{history_pretty}"


	iface = gr.Interface(
	fn=process_inputs,
	inputs=[
	gr.Textbox(lines=1, label="OpenAI API Key"),
	gr.Dropdown(["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4"], label="Model"),
	gr.Textbox(lines=2, label="Brief description of the data"),
	gr.File(label="Upload CSV Only"),
	],
	outputs=gr.Textbox(),
	)

	iface.launch()