Spaces:
Sleeping
Sleeping
Mustehson
commited on
Commit
·
7c2e7ac
1
Parent(s):
3905cae
Summary Added
Browse files
app.py
CHANGED
@@ -69,18 +69,40 @@ def get_data_df(schema):
|
|
69 |
print('Getting Dataframe from the Database')
|
70 |
return conn.sql(f"SELECT * FROM {schema} LIMIT 1000").df()
|
71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
def format_prompt(df):
|
74 |
-
|
75 |
-
"max": df.max(),
|
76 |
-
"min": df.min(),
|
77 |
-
"top": df.mode().iloc[0],
|
78 |
-
"nunique": df.nunique(),
|
79 |
-
"count": df.count(),
|
80 |
-
"dtype": df.dtypes.astype(str)
|
81 |
-
}).reset_index().rename(columns={"index": "column"})
|
82 |
return prompt_autogenerate.format_prompt(data=df.head().to_json(orient='records'),
|
83 |
-
summary=
|
84 |
def format_user_prompt(df):
|
85 |
return prompt_user_input.format_prompt(data=df.head().to_json(orient='records'))
|
86 |
|
|
|
69 |
print('Getting Dataframe from the Database')
|
70 |
return conn.sql(f"SELECT * FROM {schema} LIMIT 1000").df()
|
71 |
|
72 |
+
def df_summary(df):
|
73 |
+
summary = []
|
74 |
+
|
75 |
+
for column in df.columns:
|
76 |
+
if pd.api.types.is_numeric_dtype(df[column]):
|
77 |
+
summary.append({
|
78 |
+
"column": column,
|
79 |
+
"max": df[column].max(),
|
80 |
+
"min": df[column].min(),
|
81 |
+
"count": df[column].count(),
|
82 |
+
"nunique": df[column].nunique(),
|
83 |
+
"dtype": str(df[column].dtype),
|
84 |
+
"top": None
|
85 |
+
})
|
86 |
+
|
87 |
+
elif pd.api.types.is_categorical_dtype(df[column]) or pd.api.types.is_object_dtype(df[column]):
|
88 |
+
top_value = df[column].mode().iloc[0] if not df[column].mode().empty else None
|
89 |
+
|
90 |
+
summary.append({
|
91 |
+
"column": column,
|
92 |
+
"max": None,
|
93 |
+
"min": None,
|
94 |
+
"count": df[column].count(),
|
95 |
+
"nunique": df[column].nunique(),
|
96 |
+
"dtype": str(df[column].dtype),
|
97 |
+
"top": top_value
|
98 |
+
})
|
99 |
+
summary_df = pd.DataFrame(summary)
|
100 |
+
return summary_df.reset_index(drop=True)
|
101 |
|
102 |
def format_prompt(df):
|
103 |
+
summary = df_summary(df)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
return prompt_autogenerate.format_prompt(data=df.head().to_json(orient='records'),
|
105 |
+
summary=summary.to_json(orient='records'))
|
106 |
def format_user_prompt(df):
|
107 |
return prompt_user_input.format_prompt(data=df.head().to_json(orient='records'))
|
108 |
|