Liu Yiwen commited on
Commit
5c68062
·
1 Parent(s): a2f1673

新增了意见框、物理含义映射、index轴等功能

Browse files
__pycache__/comm_utils.cpython-311.pyc CHANGED
Binary files a/__pycache__/comm_utils.cpython-311.pyc and b/__pycache__/comm_utils.cpython-311.pyc differ
 
__pycache__/config.cpython-311.pyc CHANGED
Binary files a/__pycache__/config.cpython-311.pyc and b/__pycache__/config.cpython-311.pyc differ
 
__pycache__/utils.cpython-311.pyc CHANGED
Binary files a/__pycache__/utils.cpython-311.pyc and b/__pycache__/utils.cpython-311.pyc differ
 
app.py CHANGED
@@ -226,7 +226,7 @@ def process_salesforce_data(dataset: str, config: str, split: str, page: List[st
226
  tot_samples, tot_targets = max_page, len(df['target'][0]) if isinstance(df['target'][0], np.ndarray) and df['target'][0].dtype == 'O' else 1
227
  if 'all' in sub_targets:
228
  sub_targets = [i for i in range(tot_targets)]
229
- df = clean_up_df(df, sub_targets)
230
  row = df.iloc[0]
231
  id_list.append(row['item_id'])
232
  # 将单行的DataFrame展开为新的DataFrame
@@ -276,11 +276,16 @@ with gr.Blocks() as demo:
276
  # "statistics_textbox": statistics_textbox,
277
  # "user_input_box": user_input_box,
278
  # "plot": plot})
 
 
 
 
 
279
  score_slider = gr.Slider(1, 5, 1, step=0.5, label="Score for answer", interactive=True)
280
  with gr.Row():
281
  with gr.Column(scale=2):
282
  user_submit_button = gr.Button("submit", interactive=True)
283
- user_name_box = gr.Textbox(label="user_name", placeholder="Enter your name first", interactive=True)
284
  with gr.Column(scale=1):
285
  submit_info_box = gr.Textbox(label="submit_info", interactive=False)
286
  with gr.Row():
@@ -401,7 +406,7 @@ with gr.Blocks() as demo:
401
  cp_split.change(show_dataset_at_config_and_split, inputs=[cp_dataset, cp_config, cp_split], outputs=all_outputs)
402
  cp_goto_page.click(show_dataset_at_config_and_split_and_page, inputs=[cp_dataset, cp_config, cp_split, cp_page], outputs=all_outputs)
403
  cp_goto_next_page.click(show_dataset_at_config_and_split_and_next_page, inputs=[cp_dataset, cp_config, cp_split, cp_page], outputs=all_outputs)
404
- user_submit_button.click(save_score, inputs=[user_name_box, cp_config, qusetion_id_box, score_slider], outputs=[submit_info_box])
405
  # select_buttom.click(show_dataset_at_config_and_split_and_page, inputs=[cp_dataset, cp_config, cp_split, select_sample_box, select_subtarget_box], outputs=all_outputs)
406
 
407
 
@@ -412,3 +417,10 @@ if __name__ == "__main__":
412
  # import subprocess
413
  # subprocess.Popen(["python", "test_server.py"])
414
  uvicorn.run(app, host=host, port=7860)
 
 
 
 
 
 
 
 
226
  tot_samples, tot_targets = max_page, len(df['target'][0]) if isinstance(df['target'][0], np.ndarray) and df['target'][0].dtype == 'O' else 1
227
  if 'all' in sub_targets:
228
  sub_targets = [i for i in range(tot_targets)]
229
+ df = clean_up_df(df, sub_targets, SUBTARGET_MEANING_MAP[config])
230
  row = df.iloc[0]
231
  id_list.append(row['item_id'])
232
  # 将单行的DataFrame展开为新的DataFrame
 
276
  # "statistics_textbox": statistics_textbox,
277
  # "user_input_box": user_input_box,
278
  # "plot": plot})
279
+ with gr.Row():
280
+ with gr.Column(scale=1):
281
+ choose_retain = gr.Dropdown(["delete", "retain", "modify"], label="Choose to retain or delete or modify", interactive=True)
282
+ with gr.Column(scale=2):
283
+ choose_retain_reason_box = gr.Textbox(label="Reason", placeholder="Enter your reason", interactive=True)
284
  score_slider = gr.Slider(1, 5, 1, step=0.5, label="Score for answer", interactive=True)
285
  with gr.Row():
286
  with gr.Column(scale=2):
287
  user_submit_button = gr.Button("submit", interactive=True)
288
+ user_name_box = gr.Textbox(label="user_name", placeholder="Enter your name firstly", interactive=True)
289
  with gr.Column(scale=1):
290
  submit_info_box = gr.Textbox(label="submit_info", interactive=False)
291
  with gr.Row():
 
406
  cp_split.change(show_dataset_at_config_and_split, inputs=[cp_dataset, cp_config, cp_split], outputs=all_outputs)
407
  cp_goto_page.click(show_dataset_at_config_and_split_and_page, inputs=[cp_dataset, cp_config, cp_split, cp_page], outputs=all_outputs)
408
  cp_goto_next_page.click(show_dataset_at_config_and_split_and_next_page, inputs=[cp_dataset, cp_config, cp_split, cp_page], outputs=all_outputs)
409
+ user_submit_button.click(save_score, inputs=[user_name_box, cp_config, qusetion_id_box, score_slider, choose_retain, choose_retain_reason_box], outputs=[submit_info_box])
410
  # select_buttom.click(show_dataset_at_config_and_split_and_page, inputs=[cp_dataset, cp_config, cp_split, select_sample_box, select_subtarget_box], outputs=all_outputs)
411
 
412
 
 
417
  # import subprocess
418
  # subprocess.Popen(["python", "test_server.py"])
419
  uvicorn.run(app, host=host, port=7860)
420
+
421
+ #// 对一下数据 --
422
+ #// 部署到服务器上
423
+ #// 测试一下功能 --
424
+ #// 加一个选择文本框【删除、保留、修改】,加一个意见的文本框 --
425
+ #// 横坐标增加一个代表index的轴 -
426
+ #// 加一个物理含义的映射 -
comm_utils.py CHANGED
@@ -9,14 +9,16 @@ def save_to_file(user_input):
9
  with open("user_input.txt", "w") as file:
10
  file.write(user_input)
11
 
12
- def save_score(user_id, subset_name, question_id, score):
13
  if user_id.strip() == '':
14
  raise ValueError("User ID cannot be empty.")
15
 
16
  score_data = {
17
  "subset_name": subset_name,
18
  "question_id": question_id,
19
- "score": score
 
 
20
  }
21
 
22
  # 创建用户目录路径
 
9
  with open("user_input.txt", "w") as file:
10
  file.write(user_input)
11
 
12
+ def save_score(user_id, subset_name, question_id, score, retain_option, retain_reason):
13
  if user_id.strip() == '':
14
  raise ValueError("User ID cannot be empty.")
15
 
16
  score_data = {
17
  "subset_name": subset_name,
18
  "question_id": question_id,
19
+ "score": score,
20
+ "retain_option": retain_option,
21
+ "retain_reason": retain_reason
22
  }
23
 
24
  # 创建用户目录路径
config.py CHANGED
@@ -15,4 +15,12 @@ COLUMN_ANSWER = 'answer'
15
  COLUMN_DOMAIN = 'domain'
16
  COLUMN_SOURCE = 'source'
17
  COLUMN_LOCAL_OVERALL = 'local_overall'
18
- COLUMN_OPTIONS = 'options'
 
 
 
 
 
 
 
 
 
15
  COLUMN_DOMAIN = 'domain'
16
  COLUMN_SOURCE = 'source'
17
  COLUMN_LOCAL_OVERALL = 'local_overall'
18
+ COLUMN_OPTIONS = 'options'
19
+
20
+ #subtarget meaning map
21
+ SUBTARGET_MEANING_MAP = {'traffic_hourly' : {0:'Road occupancy rates'},
22
+ 'alibaba_cluster_trace_2018' : {0:'CPU',
23
+ 1:'memory usage'},
24
+ 'era5_2018' : {**{k: 'Relative Humidity' for k in range(10, 17)},
25
+ **{k: 'Temperature' for k in range(24, 31)}}}
26
+ #TODO:不同索引下名称不能相同,否则生成df时会报错
utils.py CHANGED
@@ -35,7 +35,7 @@ def ndarray_to_base64(ndarray):
35
  base64_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
36
  return f"data:image/png;base64,{base64_str}"
37
 
38
- def flatten_ndarray_column(df, column_name, rows_to_include):
39
  """
40
  将嵌套的np.ndarray列展平为多列,并只保留指定的行。
41
  """
@@ -50,7 +50,10 @@ def flatten_ndarray_column(df, column_name, rows_to_include):
50
  selected_data = df[column_name].apply(select_and_flatten)
51
 
52
  for i, index in enumerate(rows_to_include):
53
- df[f'{column_name}_{index}'] = selected_data.apply(lambda x: x[i])
 
 
 
54
 
55
  return df
56
 
@@ -64,7 +67,7 @@ def create_plot(dfs:list[pd.DataFrame], ids:list[str], interval:list[int, int]=N
64
  df = df.iloc[interval[0]:interval[1]]
65
  for i, column in enumerate(df.columns[1:]):
66
  fig.add_trace(go.Scatter(
67
- x=df[df.columns[0]],
68
  y=df[column],
69
  mode='lines',
70
  name=f"item_{df_id} - {column}",
@@ -120,7 +123,7 @@ def create_statistic(dfs: list[pd.DataFrame], ids: list[str], interval:list[int,
120
  combined_stats_df = combined_stats_df.applymap(lambda x: round(x, 2) if isinstance(x, (int, float)) else x)
121
  return combined_stats_df
122
 
123
- def clean_up_df(df: pd.DataFrame, rows_to_include: list[int]) -> pd.DataFrame:
124
  """
125
  清理数据集,将嵌套的np.ndarray列展平为多列。
126
  """
@@ -131,7 +134,7 @@ def clean_up_df(df: pd.DataFrame, rows_to_include: list[int]) -> pd.DataFrame:
131
  periods=len(row['target'][0]) if isinstance(row['target'][0], np.ndarray) else len(row['target']),
132
  freq=row['freq']
133
  ).to_pydatetime().tolist(), axis=1)
134
- df = flatten_ndarray_column(df, 'target', rows_to_include)
135
  # 删除原始的start和freq列
136
  df.drop(columns=['start', 'freq', 'target'], inplace=True)
137
  if 'past_feat_dynamic_real' in df.columns:
 
35
  base64_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
36
  return f"data:image/png;base64,{base64_str}"
37
 
38
+ def flatten_ndarray_column(df, column_name, rows_to_include, name_mapping_map:dict|None=None):
39
  """
40
  将嵌套的np.ndarray列展平为多列,并只保留指定的行。
41
  """
 
50
  selected_data = df[column_name].apply(select_and_flatten)
51
 
52
  for i, index in enumerate(rows_to_include):
53
+ if name_mapping_map is not None and index in name_mapping_map:
54
+ df[f'{column_name}_{name_mapping_map[index]}'] = selected_data.apply(lambda x: x[i])
55
+ else:
56
+ df[f'{column_name}_{index}'] = selected_data.apply(lambda x: x[i])
57
 
58
  return df
59
 
 
67
  df = df.iloc[interval[0]:interval[1]]
68
  for i, column in enumerate(df.columns[1:]):
69
  fig.add_trace(go.Scatter(
70
+ x=list(range(len(df[df.columns[0]]))),
71
  y=df[column],
72
  mode='lines',
73
  name=f"item_{df_id} - {column}",
 
123
  combined_stats_df = combined_stats_df.applymap(lambda x: round(x, 2) if isinstance(x, (int, float)) else x)
124
  return combined_stats_df
125
 
126
+ def clean_up_df(df: pd.DataFrame, rows_to_include: list[int], name_mapping_map:dict|None=None) -> pd.DataFrame:
127
  """
128
  清理数据集,将嵌套的np.ndarray列展平为多列。
129
  """
 
134
  periods=len(row['target'][0]) if isinstance(row['target'][0], np.ndarray) else len(row['target']),
135
  freq=row['freq']
136
  ).to_pydatetime().tolist(), axis=1)
137
+ df = flatten_ndarray_column(df, 'target', rows_to_include, name_mapping_map)
138
  # 删除原始的start和freq列
139
  df.drop(columns=['start', 'freq', 'target'], inplace=True)
140
  if 'past_feat_dynamic_real' in df.columns: