Spaces:
Sleeping
Sleeping
Liu Yiwen
commited on
Commit
·
5c68062
1
Parent(s):
a2f1673
新增了意见框、物理含义映射、index轴等功能
Browse files- __pycache__/comm_utils.cpython-311.pyc +0 -0
- __pycache__/config.cpython-311.pyc +0 -0
- __pycache__/utils.cpython-311.pyc +0 -0
- app.py +15 -3
- comm_utils.py +4 -2
- config.py +9 -1
- utils.py +8 -5
__pycache__/comm_utils.cpython-311.pyc
CHANGED
Binary files a/__pycache__/comm_utils.cpython-311.pyc and b/__pycache__/comm_utils.cpython-311.pyc differ
|
|
__pycache__/config.cpython-311.pyc
CHANGED
Binary files a/__pycache__/config.cpython-311.pyc and b/__pycache__/config.cpython-311.pyc differ
|
|
__pycache__/utils.cpython-311.pyc
CHANGED
Binary files a/__pycache__/utils.cpython-311.pyc and b/__pycache__/utils.cpython-311.pyc differ
|
|
app.py
CHANGED
@@ -226,7 +226,7 @@ def process_salesforce_data(dataset: str, config: str, split: str, page: List[st
|
|
226 |
tot_samples, tot_targets = max_page, len(df['target'][0]) if isinstance(df['target'][0], np.ndarray) and df['target'][0].dtype == 'O' else 1
|
227 |
if 'all' in sub_targets:
|
228 |
sub_targets = [i for i in range(tot_targets)]
|
229 |
-
df = clean_up_df(df, sub_targets)
|
230 |
row = df.iloc[0]
|
231 |
id_list.append(row['item_id'])
|
232 |
# 将单行的DataFrame展开为新的DataFrame
|
@@ -276,11 +276,16 @@ with gr.Blocks() as demo:
|
|
276 |
# "statistics_textbox": statistics_textbox,
|
277 |
# "user_input_box": user_input_box,
|
278 |
# "plot": plot})
|
|
|
|
|
|
|
|
|
|
|
279 |
score_slider = gr.Slider(1, 5, 1, step=0.5, label="Score for answer", interactive=True)
|
280 |
with gr.Row():
|
281 |
with gr.Column(scale=2):
|
282 |
user_submit_button = gr.Button("submit", interactive=True)
|
283 |
-
user_name_box = gr.Textbox(label="user_name", placeholder="Enter your name
|
284 |
with gr.Column(scale=1):
|
285 |
submit_info_box = gr.Textbox(label="submit_info", interactive=False)
|
286 |
with gr.Row():
|
@@ -401,7 +406,7 @@ with gr.Blocks() as demo:
|
|
401 |
cp_split.change(show_dataset_at_config_and_split, inputs=[cp_dataset, cp_config, cp_split], outputs=all_outputs)
|
402 |
cp_goto_page.click(show_dataset_at_config_and_split_and_page, inputs=[cp_dataset, cp_config, cp_split, cp_page], outputs=all_outputs)
|
403 |
cp_goto_next_page.click(show_dataset_at_config_and_split_and_next_page, inputs=[cp_dataset, cp_config, cp_split, cp_page], outputs=all_outputs)
|
404 |
-
user_submit_button.click(save_score, inputs=[user_name_box, cp_config, qusetion_id_box, score_slider], outputs=[submit_info_box])
|
405 |
# select_buttom.click(show_dataset_at_config_and_split_and_page, inputs=[cp_dataset, cp_config, cp_split, select_sample_box, select_subtarget_box], outputs=all_outputs)
|
406 |
|
407 |
|
@@ -412,3 +417,10 @@ if __name__ == "__main__":
|
|
412 |
# import subprocess
|
413 |
# subprocess.Popen(["python", "test_server.py"])
|
414 |
uvicorn.run(app, host=host, port=7860)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
tot_samples, tot_targets = max_page, len(df['target'][0]) if isinstance(df['target'][0], np.ndarray) and df['target'][0].dtype == 'O' else 1
|
227 |
if 'all' in sub_targets:
|
228 |
sub_targets = [i for i in range(tot_targets)]
|
229 |
+
df = clean_up_df(df, sub_targets, SUBTARGET_MEANING_MAP[config])
|
230 |
row = df.iloc[0]
|
231 |
id_list.append(row['item_id'])
|
232 |
# 将单行的DataFrame展开为新的DataFrame
|
|
|
276 |
# "statistics_textbox": statistics_textbox,
|
277 |
# "user_input_box": user_input_box,
|
278 |
# "plot": plot})
|
279 |
+
with gr.Row():
|
280 |
+
with gr.Column(scale=1):
|
281 |
+
choose_retain = gr.Dropdown(["delete", "retain", "modify"], label="Choose to retain or delete or modify", interactive=True)
|
282 |
+
with gr.Column(scale=2):
|
283 |
+
choose_retain_reason_box = gr.Textbox(label="Reason", placeholder="Enter your reason", interactive=True)
|
284 |
score_slider = gr.Slider(1, 5, 1, step=0.5, label="Score for answer", interactive=True)
|
285 |
with gr.Row():
|
286 |
with gr.Column(scale=2):
|
287 |
user_submit_button = gr.Button("submit", interactive=True)
|
288 |
+
user_name_box = gr.Textbox(label="user_name", placeholder="Enter your name firstly", interactive=True)
|
289 |
with gr.Column(scale=1):
|
290 |
submit_info_box = gr.Textbox(label="submit_info", interactive=False)
|
291 |
with gr.Row():
|
|
|
406 |
cp_split.change(show_dataset_at_config_and_split, inputs=[cp_dataset, cp_config, cp_split], outputs=all_outputs)
|
407 |
cp_goto_page.click(show_dataset_at_config_and_split_and_page, inputs=[cp_dataset, cp_config, cp_split, cp_page], outputs=all_outputs)
|
408 |
cp_goto_next_page.click(show_dataset_at_config_and_split_and_next_page, inputs=[cp_dataset, cp_config, cp_split, cp_page], outputs=all_outputs)
|
409 |
+
user_submit_button.click(save_score, inputs=[user_name_box, cp_config, qusetion_id_box, score_slider, choose_retain, choose_retain_reason_box], outputs=[submit_info_box])
|
410 |
# select_buttom.click(show_dataset_at_config_and_split_and_page, inputs=[cp_dataset, cp_config, cp_split, select_sample_box, select_subtarget_box], outputs=all_outputs)
|
411 |
|
412 |
|
|
|
417 |
# import subprocess
|
418 |
# subprocess.Popen(["python", "test_server.py"])
|
419 |
uvicorn.run(app, host=host, port=7860)
|
420 |
+
|
421 |
+
#// 对一下数据 --
|
422 |
+
#// 部署到服务器上
|
423 |
+
#// 测试一下功能 --
|
424 |
+
#// 加一个选择文本框【删除、保留、修改】,加一个意见的文本框 --
|
425 |
+
#// 横坐标增加一个代表index的轴 -
|
426 |
+
#// 加一个物理含义的映射 -
|
comm_utils.py
CHANGED
@@ -9,14 +9,16 @@ def save_to_file(user_input):
|
|
9 |
with open("user_input.txt", "w") as file:
|
10 |
file.write(user_input)
|
11 |
|
12 |
-
def save_score(user_id, subset_name, question_id, score):
|
13 |
if user_id.strip() == '':
|
14 |
raise ValueError("User ID cannot be empty.")
|
15 |
|
16 |
score_data = {
|
17 |
"subset_name": subset_name,
|
18 |
"question_id": question_id,
|
19 |
-
"score": score
|
|
|
|
|
20 |
}
|
21 |
|
22 |
# 创建用户目录路径
|
|
|
9 |
with open("user_input.txt", "w") as file:
|
10 |
file.write(user_input)
|
11 |
|
12 |
+
def save_score(user_id, subset_name, question_id, score, retain_option, retain_reason):
|
13 |
if user_id.strip() == '':
|
14 |
raise ValueError("User ID cannot be empty.")
|
15 |
|
16 |
score_data = {
|
17 |
"subset_name": subset_name,
|
18 |
"question_id": question_id,
|
19 |
+
"score": score,
|
20 |
+
"retain_option": retain_option,
|
21 |
+
"retain_reason": retain_reason
|
22 |
}
|
23 |
|
24 |
# 创建用户目录路径
|
config.py
CHANGED
@@ -15,4 +15,12 @@ COLUMN_ANSWER = 'answer'
|
|
15 |
COLUMN_DOMAIN = 'domain'
|
16 |
COLUMN_SOURCE = 'source'
|
17 |
COLUMN_LOCAL_OVERALL = 'local_overall'
|
18 |
-
COLUMN_OPTIONS = 'options'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
COLUMN_DOMAIN = 'domain'
|
16 |
COLUMN_SOURCE = 'source'
|
17 |
COLUMN_LOCAL_OVERALL = 'local_overall'
|
18 |
+
COLUMN_OPTIONS = 'options'
|
19 |
+
|
20 |
+
#subtarget meaning map
|
21 |
+
SUBTARGET_MEANING_MAP = {'traffic_hourly' : {0:'Road occupancy rates'},
|
22 |
+
'alibaba_cluster_trace_2018' : {0:'CPU',
|
23 |
+
1:'memory usage'},
|
24 |
+
'era5_2018' : {**{k: 'Relative Humidity' for k in range(10, 17)},
|
25 |
+
**{k: 'Temperature' for k in range(24, 31)}}}
|
26 |
+
#TODO:不同索引下名称不能相同,否则生成df时会报错
|
utils.py
CHANGED
@@ -35,7 +35,7 @@ def ndarray_to_base64(ndarray):
|
|
35 |
base64_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
|
36 |
return f"data:image/png;base64,{base64_str}"
|
37 |
|
38 |
-
def flatten_ndarray_column(df, column_name, rows_to_include):
|
39 |
"""
|
40 |
将嵌套的np.ndarray列展平为多列,并只保留指定的行。
|
41 |
"""
|
@@ -50,7 +50,10 @@ def flatten_ndarray_column(df, column_name, rows_to_include):
|
|
50 |
selected_data = df[column_name].apply(select_and_flatten)
|
51 |
|
52 |
for i, index in enumerate(rows_to_include):
|
53 |
-
|
|
|
|
|
|
|
54 |
|
55 |
return df
|
56 |
|
@@ -64,7 +67,7 @@ def create_plot(dfs:list[pd.DataFrame], ids:list[str], interval:list[int, int]=N
|
|
64 |
df = df.iloc[interval[0]:interval[1]]
|
65 |
for i, column in enumerate(df.columns[1:]):
|
66 |
fig.add_trace(go.Scatter(
|
67 |
-
x=df[df.columns[0]],
|
68 |
y=df[column],
|
69 |
mode='lines',
|
70 |
name=f"item_{df_id} - {column}",
|
@@ -120,7 +123,7 @@ def create_statistic(dfs: list[pd.DataFrame], ids: list[str], interval:list[int,
|
|
120 |
combined_stats_df = combined_stats_df.applymap(lambda x: round(x, 2) if isinstance(x, (int, float)) else x)
|
121 |
return combined_stats_df
|
122 |
|
123 |
-
def clean_up_df(df: pd.DataFrame, rows_to_include: list[int]) -> pd.DataFrame:
|
124 |
"""
|
125 |
清理数据集,将嵌套的np.ndarray列展平为多列。
|
126 |
"""
|
@@ -131,7 +134,7 @@ def clean_up_df(df: pd.DataFrame, rows_to_include: list[int]) -> pd.DataFrame:
|
|
131 |
periods=len(row['target'][0]) if isinstance(row['target'][0], np.ndarray) else len(row['target']),
|
132 |
freq=row['freq']
|
133 |
).to_pydatetime().tolist(), axis=1)
|
134 |
-
df = flatten_ndarray_column(df, 'target', rows_to_include)
|
135 |
# 删除原始的start和freq列
|
136 |
df.drop(columns=['start', 'freq', 'target'], inplace=True)
|
137 |
if 'past_feat_dynamic_real' in df.columns:
|
|
|
35 |
base64_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
|
36 |
return f"data:image/png;base64,{base64_str}"
|
37 |
|
38 |
+
def flatten_ndarray_column(df, column_name, rows_to_include, name_mapping_map:dict|None=None):
|
39 |
"""
|
40 |
将嵌套的np.ndarray列展平为多列,并只保留指定的行。
|
41 |
"""
|
|
|
50 |
selected_data = df[column_name].apply(select_and_flatten)
|
51 |
|
52 |
for i, index in enumerate(rows_to_include):
|
53 |
+
if name_mapping_map is not None and index in name_mapping_map:
|
54 |
+
df[f'{column_name}_{name_mapping_map[index]}'] = selected_data.apply(lambda x: x[i])
|
55 |
+
else:
|
56 |
+
df[f'{column_name}_{index}'] = selected_data.apply(lambda x: x[i])
|
57 |
|
58 |
return df
|
59 |
|
|
|
67 |
df = df.iloc[interval[0]:interval[1]]
|
68 |
for i, column in enumerate(df.columns[1:]):
|
69 |
fig.add_trace(go.Scatter(
|
70 |
+
x=list(range(len(df[df.columns[0]]))),
|
71 |
y=df[column],
|
72 |
mode='lines',
|
73 |
name=f"item_{df_id} - {column}",
|
|
|
123 |
combined_stats_df = combined_stats_df.applymap(lambda x: round(x, 2) if isinstance(x, (int, float)) else x)
|
124 |
return combined_stats_df
|
125 |
|
126 |
+
def clean_up_df(df: pd.DataFrame, rows_to_include: list[int], name_mapping_map:dict|None=None) -> pd.DataFrame:
|
127 |
"""
|
128 |
清理数据集,将嵌套的np.ndarray列展平为多列。
|
129 |
"""
|
|
|
134 |
periods=len(row['target'][0]) if isinstance(row['target'][0], np.ndarray) else len(row['target']),
|
135 |
freq=row['freq']
|
136 |
).to_pydatetime().tolist(), axis=1)
|
137 |
+
df = flatten_ndarray_column(df, 'target', rows_to_include, name_mapping_map)
|
138 |
# 删除原始的start和freq列
|
139 |
df.drop(columns=['start', 'freq', 'target'], inplace=True)
|
140 |
if 'past_feat_dynamic_real' in df.columns:
|