yuchenlin commited on
Commit
aa4b227
Β·
1 Parent(s): 910a6fe

update app

Browse files
Files changed (2) hide show
  1. .gitignore +1 -0
  2. app.py +125 -25
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ app_ref.py
app.py CHANGED
@@ -1,8 +1,11 @@
1
  import gradio as gr
2
  import random
3
  from datasets import load_dataset
4
-
5
-
 
 
 
6
  # # Sample dataset with unique 10-digit IDs
7
  # qa_dataset = {
8
  # "1234567890": {
@@ -18,20 +21,52 @@ from datasets import load_dataset
18
  # # Add more questions with unique IDs as needed
19
  # }
20
 
21
- truth_data = load_dataset("commonsense-index-dev/commonsense-candidates", "iter6-0520-error", split="train")
 
 
 
 
22
 
23
  qa_dataset = {}
24
  for item in truth_data:
25
  qa_dataset[item["id"]] = {
26
  "question": item["task"],
27
  "choices": item["choices"],
28
- "answer": item["answer"]
29
  }
 
 
 
 
 
 
 
 
 
 
30
 
31
- def get_random_question():
32
- question_id = random.choice(list(qa_dataset.keys()))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  question_data = qa_dataset[question_id]
34
- return question_id, question_data["question"], question_data["choices"]
 
35
 
36
  def get_question_by_id(question_id):
37
  if question_id in qa_dataset:
@@ -40,44 +75,109 @@ def get_question_by_id(question_id):
40
  else:
41
  return None, "Invalid question ID", []
42
 
43
- def check_answer(question_id, choice):
44
  correct_answer = qa_dataset[question_id]["answer"]
45
- return "Correct!" if choice == correct_answer else f"Incorrect. The correct answer is {correct_answer}."
46
-
47
- def load_question(question_id=None):
48
- if question_id:
49
- question_id, question, choices = get_question_by_id(question_id)
50
  else:
51
- question_id, question, choices = get_random_question()
52
-
53
- question = f"## {question}"
 
 
 
54
  choices_markdown = "\n".join(choices)
55
- return question_id, question, choices_markdown, gr.update(visible=True), gr.update(value="", visible=True)
 
 
 
56
 
57
  def show_buttons(choices_markdown):
58
  choices = choices_markdown.split("\n")
59
  visibility = [gr.update(visible=False)] * 10
60
  for i in range(len(choices)):
 
 
61
  visibility[i] = gr.update(visible=True, value=choices[i])
 
62
  return visibility
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  with gr.Blocks() as app:
65
- gr.Markdown("# Multiple Choice QA Dataset Viewer")
 
 
 
 
66
 
67
- question_id_input = gr.Textbox(label="Enter Question ID", placeholder="leave empty for random sampling")
68
- random_button = gr.Button("Retrieve or Random Sample")
69
  question_display = gr.Markdown(visible=True)
70
  choices_markdown = gr.Markdown(visible=False)
71
  choice_buttons = [gr.Button(visible=False) for _ in range(10)]
72
  result_display = gr.Markdown(visible=True)
 
73
 
74
- question_id = gr.State()
75
 
76
- question_id_input.submit(fn=load_question, inputs=question_id_input, outputs=[question_id, question_display, choices_markdown, result_display])
77
- random_button.click(fn=load_question, outputs=[question_id, question_display, choices_markdown, result_display])
78
- choices_markdown.change(fn=show_buttons, inputs=choices_markdown, outputs=choice_buttons)
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  for i, button in enumerate(choice_buttons):
81
- button.click(fn=check_answer, inputs=[question_id, button], outputs=result_display)
82
 
83
  app.launch()
 
1
  import gradio as gr
2
  import random
3
  from datasets import load_dataset
4
+ import json
5
+ import os
6
+ import uuid
7
+ from huggingface_hub import HfApi
8
+ import time
9
  # # Sample dataset with unique 10-digit IDs
10
  # qa_dataset = {
11
  # "1234567890": {
 
21
  # # Add more questions with unique IDs as needed
22
  # }
23
 
24
+ truth_data = load_dataset("commonsense-index-dev/commonsense-candidates", "iter7-0520", split="train")
25
+
26
+ logs = load_dataset("commonsense-index-dev/DemoFeedback", split="train")
27
+
28
+ LAST_LOG_UPDATE = time.time()
29
 
30
  qa_dataset = {}
31
  for item in truth_data:
32
  qa_dataset[item["id"]] = {
33
  "question": item["task"],
34
  "choices": item["choices"],
35
+ "answer": item["answer"]
36
  }
37
+ if "metadata" in item:
38
+ qa_dataset[item["id"]]["reason"] = item["metadata"].get("reasoning", "N/A")
39
+
40
+ def update_logs():
41
+ global LAST_LOG_UPDATE
42
+ global logs
43
+ if time.time() - LAST_LOG_UPDATE > 1800:
44
+ # update logs for every 30 minutes
45
+ logs = load_dataset("commonsense-index-dev/DemoFeedback", split="train")
46
+ LAST_LOG_UPDATE = time.time()
47
 
48
+ def get_random_question(user_name="Anonymous"):
49
+ global logs
50
+ update_logs()
51
+ # if user_name == "":
52
+ # user_name = "Anonymous"
53
+ # question_id = random.choice(list(qa_dataset.keys()))
54
+ # else:
55
+ # logs = load_dataset("commonsense-index-dev/DemoFeedback", split="train")
56
+ feedback_counts = {qid: 0 for qid in qa_dataset.keys()}
57
+ user_seen_data = set()
58
+ for item in logs:
59
+ feedback_counts[item["question_id"]] += 1
60
+ if item["user_name"] == user_name:
61
+ user_seen_data.add(item["question_id"])
62
+ # sample a question that has the least feedback, and if there are multiple, sample randomly
63
+ min_feedback = min(feedback_counts.values())
64
+ question_ids = [k for k, v in feedback_counts.items() if v == min_feedback]
65
+ question_ids = list(set(question_ids) - user_seen_data)
66
+ question_id = random.choice(question_ids)
67
  question_data = qa_dataset[question_id]
68
+ reasoning = question_data["reason"]
69
+ return question_id, question_data["question"], question_data["choices"], reasoning
70
 
71
  def get_question_by_id(question_id):
72
  if question_id in qa_dataset:
 
75
  else:
76
  return None, "Invalid question ID", []
77
 
78
+ def check_answer(question_id, choice, reasoning):
79
  correct_answer = qa_dataset[question_id]["answer"]
80
+ text = ""
81
+ if choice[3:] == correct_answer:
82
+ text += "### βœ… Correct!"
83
+ text += "\n### Reasoning: " + reasoning
 
84
  else:
85
+ text += "### ❌ Incorrect. Try again!"
86
+ return text
87
+
88
+ def load_question(question_id=None, user_name="Anonymous"):
89
+ question_id, question, choices, reasoning = get_random_question(user_name)
90
+ question = f"---\n#### QID: {question_id}\n## {question} \n---"
91
  choices_markdown = "\n".join(choices)
92
+ return question_id, question, choices_markdown, \
93
+ gr.update(value="", visible=True), reasoning, \
94
+ gr.update(value="", visible=True), \
95
+ gr.update(value="Submit your feedback! πŸš€", interactive=True)
96
 
97
  def show_buttons(choices_markdown):
98
  choices = choices_markdown.split("\n")
99
  visibility = [gr.update(visible=False)] * 10
100
  for i in range(len(choices)):
101
+ # generate ABCDEFGHIJ labels
102
+ choices[i] = chr(65 + i) + ") " + choices[i]
103
  visibility[i] = gr.update(visible=True, value=choices[i])
104
+
105
  return visibility
106
 
107
+
108
+ def submit_feedback(question_id, user_reason, example_quality, user_name_text):
109
+ if "N/A" in question_id or "N/A" in example_quality:
110
+ # send a message to the user to sample an example and select a choice first
111
+ return {
112
+ submit_button: {"interactive": True, "__type__": "update", "value": "Submit your feedback! πŸš€ Please sample an example and select a choice!"},
113
+ }
114
+ # create a jsonl file and upload it to hf
115
+ if user_name_text == "":
116
+ user_name_text = "Anonymous"
117
+ feedback_item = {
118
+ "question_id": question_id,
119
+ "user_name": user_name_text,
120
+ "user_reason": user_reason,
121
+ "example_quality": example_quality,
122
+ }
123
+ jsonl_str = json.dumps(feedback_item)
124
+ api = HfApi()
125
+ token = os.getenv("HF_TOKEN")
126
+ if token is None:
127
+ raise ValueError("Hugging Face token not found. Ensure the HF_TOKEN environment variable is set.")
128
+
129
+ # Generate a random filename using UUID
130
+ filename = f"{uuid.uuid4()}.json"
131
+
132
+ # Define the repository
133
+ repo_id = "commonsense-index-dev/DemoFeedback"
134
+
135
+ # Upload the json_str as a file directly to the specified path in your dataset repository
136
+ api.upload_file(
137
+ token=token,
138
+ repo_id=repo_id,
139
+ repo_type="dataset",
140
+ path_or_fileobj=jsonl_str.encode("utf-8"), # Convert string to bytes
141
+ path_in_repo=filename,
142
+ commit_message=f"{user_name_text}'s feedback on {question_id}",
143
+ )
144
+ return {
145
+ submit_button: {"interactive": False, "__type__": "update", "value": "Submitted! βœ… \n Please sample the next one."}
146
+ }
147
+
148
+ def refresh_feedback(question_id):
149
+ return gr.update(value="", visible=True), gr.update(value="", visible=True), gr.update(value="", visible=True)
150
+
151
  with gr.Blocks() as app:
152
+ gr.Markdown("# Commonsense Index Data Viewer")
153
+
154
+ with gr.Row():
155
+ # question_id_input = gr.Textbox(label="Enter Question ID", placeholder="leave empty for random sampling")
156
+ random_button = gr.Button("🎲 Click here to randomly sample an example")
157
 
 
 
158
  question_display = gr.Markdown(visible=True)
159
  choices_markdown = gr.Markdown(visible=False)
160
  choice_buttons = [gr.Button(visible=False) for _ in range(10)]
161
  result_display = gr.Markdown(visible=True)
162
+ reasoning_display = gr.Markdown(visible=False)
163
 
164
+ question_id = gr.Textbox(label="Question ID:", interactive=False, visible=False)
165
 
 
 
 
166
 
167
+ with gr.Row():
168
+ with gr.Column(scale=2):
169
+ reason_textbox = gr.Textbox(label="Reason", placeholder="Please talk why the correct answer is correct and why the others are wrong. If you think this is a bad example, please explain too.", type="text", elem_classes="", max_lines=5, lines=5, show_copy_button=False, visible=True, scale=4, interactive=True)
170
+ with gr.Column():
171
+ example_quality = gr.Radio(label="Quality", choices=["Good", "Bad"], interactive=True, visible=True)
172
+ user_name = gr.Textbox(label="Your username", placeholder="Your username", type="text", elem_classes="", max_lines=1, show_copy_button=False, visible=True, interactive=True, show_label=False)
173
+ submit_button = gr.Button("Submit your feedback! πŸš€", elem_classes="btn_boderline", visible=True, interactive=True)
174
+
175
+
176
+ random_button.click(fn=load_question, inputs=[user_name], outputs=[question_id, question_display, choices_markdown, result_display, reasoning_display, example_quality, submit_button])
177
+ choices_markdown.change(fn=show_buttons, inputs=choices_markdown, outputs=choice_buttons)
178
+ question_id.change(fn=refresh_feedback, inputs=[question_id], outputs=[reason_textbox, example_quality])
179
+ submit_button.click(fn=submit_feedback, inputs=[question_id, reason_textbox, example_quality, user_name], outputs=[submit_button])
180
  for i, button in enumerate(choice_buttons):
181
+ button.click(fn=check_answer, inputs=[question_id, button, reasoning_display], outputs=result_display)
182
 
183
  app.launch()