Spaces:
Sleeping
Sleeping
Mustehson
commited on
Commit
·
666a38f
1
Parent(s):
5f9d608
Close Connection
Browse files
app.py
CHANGED
@@ -101,7 +101,7 @@ def create_pipeline(schema):
|
|
101 |
|
102 |
def load_pipeline(table_name):
|
103 |
_conn = duckdb.connect("duckdb_pipeline.duckdb")
|
104 |
-
return _conn.sql(f"SELECT * FROM {table_name} LIMIT 1000").df()
|
105 |
|
106 |
def df_summary(df):
|
107 |
summary = []
|
@@ -147,6 +147,7 @@ def process_inputs(inputs) :
|
|
147 |
def run_llm(messages):
|
148 |
try:
|
149 |
response = llm.invoke(messages)
|
|
|
150 |
tests = json.loads(response.content)
|
151 |
except Exception as e:
|
152 |
return e
|
@@ -246,7 +247,7 @@ def main(table):
|
|
246 |
table_name = create_pipeline(schema)
|
247 |
|
248 |
# Load dlt pipeline
|
249 |
-
df = load_pipeline(table_name)
|
250 |
|
251 |
# df = get_data_df(schema)
|
252 |
df_statistics, df_alerts = statistics(df)
|
@@ -254,7 +255,6 @@ def main(table):
|
|
254 |
|
255 |
messages = format_prompt(df=df)
|
256 |
tests = run_llm(messages)
|
257 |
-
print(tests)
|
258 |
|
259 |
if isinstance(tests, Exception):
|
260 |
tests = pd.DataFrame([{"error": f"❌ Unable to generate tests. {tests}"}])
|
@@ -264,6 +264,7 @@ def main(table):
|
|
264 |
tests_df.rename(columns={tests_df.columns[0]: 'Column', tests_df.columns[1]: 'Rule Name', tests_df.columns[2]: 'Rules' }, inplace=True)
|
265 |
pandera_results = validate_pandera(tests, df)
|
266 |
|
|
|
267 |
return df.head(10), df_statistics, df_alerts, describe_cat, describe_num, tests_df, pandera_results
|
268 |
|
269 |
def user_results(table, text_query):
|
@@ -274,10 +275,9 @@ def user_results(table, text_query):
|
|
274 |
table_name = create_pipeline(schema)
|
275 |
|
276 |
# Load dlt pipeline
|
277 |
-
df = load_pipeline(table_name)
|
278 |
|
279 |
messages = format_user_prompt(df=df, user_description=text_query)
|
280 |
-
tests = run_llm(messages)
|
281 |
|
282 |
print(f'Generated Tests from user input: {tests}')
|
283 |
|
@@ -289,6 +289,8 @@ def user_results(table, text_query):
|
|
289 |
tests_df.rename(columns={tests_df.columns[0]: 'Column', tests_df.columns[1]: 'Rule Name', tests_df.columns[2]: 'Rules' }, inplace=True)
|
290 |
pandera_results = validate_pandera(tests, df)
|
291 |
|
|
|
|
|
292 |
return tests_df, pandera_results
|
293 |
|
294 |
# Custom CSS styling
|
|
|
101 |
|
102 |
def load_pipeline(table_name):
|
103 |
_conn = duckdb.connect("duckdb_pipeline.duckdb")
|
104 |
+
return _conn, _conn.sql(f"SELECT * FROM {table_name} LIMIT 1000").df()
|
105 |
|
106 |
def df_summary(df):
|
107 |
summary = []
|
|
|
147 |
def run_llm(messages):
|
148 |
try:
|
149 |
response = llm.invoke(messages)
|
150 |
+
print(response.content)
|
151 |
tests = json.loads(response.content)
|
152 |
except Exception as e:
|
153 |
return e
|
|
|
247 |
table_name = create_pipeline(schema)
|
248 |
|
249 |
# Load dlt pipeline
|
250 |
+
connection, df = load_pipeline(table_name)
|
251 |
|
252 |
# df = get_data_df(schema)
|
253 |
df_statistics, df_alerts = statistics(df)
|
|
|
255 |
|
256 |
messages = format_prompt(df=df)
|
257 |
tests = run_llm(messages)
|
|
|
258 |
|
259 |
if isinstance(tests, Exception):
|
260 |
tests = pd.DataFrame([{"error": f"❌ Unable to generate tests. {tests}"}])
|
|
|
264 |
tests_df.rename(columns={tests_df.columns[0]: 'Column', tests_df.columns[1]: 'Rule Name', tests_df.columns[2]: 'Rules' }, inplace=True)
|
265 |
pandera_results = validate_pandera(tests, df)
|
266 |
|
267 |
+
connection.close()
|
268 |
return df.head(10), df_statistics, df_alerts, describe_cat, describe_num, tests_df, pandera_results
|
269 |
|
270 |
def user_results(table, text_query):
|
|
|
275 |
table_name = create_pipeline(schema)
|
276 |
|
277 |
# Load dlt pipeline
|
278 |
+
connection, df = load_pipeline(table_name)
|
279 |
|
280 |
messages = format_user_prompt(df=df, user_description=text_query)
|
|
|
281 |
|
282 |
print(f'Generated Tests from user input: {tests}')
|
283 |
|
|
|
289 |
tests_df.rename(columns={tests_df.columns[0]: 'Column', tests_df.columns[1]: 'Rule Name', tests_df.columns[2]: 'Rules' }, inplace=True)
|
290 |
pandera_results = validate_pandera(tests, df)
|
291 |
|
292 |
+
connection.close()
|
293 |
+
|
294 |
return tests_df, pandera_results
|
295 |
|
296 |
# Custom CSS styling
|