File size: 2,725 Bytes
f17bdaf
ccf0104
bc88deb
 
 
 
f17bdaf
 
bc88deb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ccf0104
 
 
 
 
bc88deb
 
 
 
ccf0104
bc88deb
ccf0104
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import gradio as gr
import pandas as pd
from langchain.agents import create_pandas_dataframe_agent
from langchain.chat_models import ChatOpenAI
import re
import json



def process_inputs(open_api_key, open_api_model, description, csv_file):
    # setup agent

    try:
        df = pd.read_csv(csv_file.name)
        df_agent = create_pandas_dataframe_agent(
            ChatOpenAI(
                temperature=0, openai_api_key=open_api_key, model=open_api_model
            ),
            df,
            verbose=True,
        )
    except Exception as e:
        return print(e)

    df_agent.agent.llm_chain.prompt.template = (
        "Context: " + description + df_agent.agent.llm_chain.prompt.template
    )
    # Planning
    history = {}
    description = df_agent.run("Describe the data")
    history["description"] = description

    planning = df_agent.run(
        f"Description: {description} Based on this description and the data itself, what kind of analysis can we perform on the data? Return result in bullet points without the bullets, just new lines"
    )

    print(planning)

    # remove non-Alphanumeric characters at the beginning of the string
    tasks = list(
        filter(
            bool, [re.sub(r"^[\W_]+", "", str.strip()) for str in planning.splitlines()]
        )
    )
    history["planning"] = tasks

    # Perform tasks
    f = open("result.txt", "a")
    result = []
    for task in tasks:
        print(f"The task is: {task}")
        data = df_agent.run(task)
        result.append(data)
        f.write(data + "\n")

    f.close()
    history["result"] = result

    plots = df_agent.run(
        f"Based on the information given below, create some insightful plots using matplotlib, seaborn or plotly with python_repl_ast. Save these plots in the root directory with a relevant name, and return a dict where keys are the name of the files and the values are the insights dervied from the plot. Information about the data is as follows: {history}"
    )
    history["plots"] = plots

    hypothesis = df_agent.run(
        f"Based on the information, data, and context given, form valid hypothesis for further investigations. Information about the data is as follows: {history}"
    )
    history["hypothesis"] = hypothesis

    history_pretty = json.dump(history, csv_file, indent=2)

    return f"{history_pretty}"


iface = gr.Interface(
    fn=process_inputs,
    inputs=[
        gr.Textbox(lines=1, label="OpenAI API Key"),
        gr.Dropdown(["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4"], label="Model"),
        gr.Textbox(lines=2, label="Brief description of the data"),
        gr.File(label="Upload CSV Only"),
    ],
    outputs=gr.Textbox(),
)

iface.launch()