Mustehson commited on
Commit
7c2e7ac
·
1 Parent(s): 3905cae

Summary Added

Browse files
Files changed (1) hide show
  1. app.py +31 -9
app.py CHANGED
@@ -69,18 +69,40 @@ def get_data_df(schema):
69
  print('Getting Dataframe from the Database')
70
  return conn.sql(f"SELECT * FROM {schema} LIMIT 1000").df()
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  def format_prompt(df):
74
- summary_df = pd.DataFrame({
75
- "max": df.max(),
76
- "min": df.min(),
77
- "top": df.mode().iloc[0],
78
- "nunique": df.nunique(),
79
- "count": df.count(),
80
- "dtype": df.dtypes.astype(str)
81
- }).reset_index().rename(columns={"index": "column"})
82
  return prompt_autogenerate.format_prompt(data=df.head().to_json(orient='records'),
83
- summary=summary_df.to_json(orient='records'))
84
  def format_user_prompt(df):
85
  return prompt_user_input.format_prompt(data=df.head().to_json(orient='records'))
86
 
 
69
  print('Getting Dataframe from the Database')
70
  return conn.sql(f"SELECT * FROM {schema} LIMIT 1000").df()
71
 
72
+ def df_summary(df):
73
+ summary = []
74
+
75
+ for column in df.columns:
76
+ if pd.api.types.is_numeric_dtype(df[column]):
77
+ summary.append({
78
+ "column": column,
79
+ "max": df[column].max(),
80
+ "min": df[column].min(),
81
+ "count": df[column].count(),
82
+ "nunique": df[column].nunique(),
83
+ "dtype": str(df[column].dtype),
84
+ "top": None
85
+ })
86
+
87
+ elif pd.api.types.is_categorical_dtype(df[column]) or pd.api.types.is_object_dtype(df[column]):
88
+ top_value = df[column].mode().iloc[0] if not df[column].mode().empty else None
89
+
90
+ summary.append({
91
+ "column": column,
92
+ "max": None,
93
+ "min": None,
94
+ "count": df[column].count(),
95
+ "nunique": df[column].nunique(),
96
+ "dtype": str(df[column].dtype),
97
+ "top": top_value
98
+ })
99
+ summary_df = pd.DataFrame(summary)
100
+ return summary_df.reset_index(drop=True)
101
 
102
  def format_prompt(df):
103
+ summary = df_summary(df)
 
 
 
 
 
 
 
104
  return prompt_autogenerate.format_prompt(data=df.head().to_json(orient='records'),
105
+ summary=summary.to_json(orient='records'))
106
  def format_user_prompt(df):
107
  return prompt_user_input.format_prompt(data=df.head().to_json(orient='records'))
108